@collection{monducci_insegnare_2018,
	location = {Torino},
	edition = {3. ed},
	title = {Insegnare storia: il laboratorio storico e altre pratiche attive},
	isbn = {978-88-6008-533-7},
	shorttitle = {Insegnare storia},
	pagetotal = {328},
	publisher = {{UTET} Università},
	editor = {Monducci, Francesco},
	date = {2018},
}

@book{panciera_didattica_2013,
	location = {Firenze ; Milano},
	edition = {3. ed. aggiornata},
	title = {Didattica della storia: manuale per la formazione degli insegnanti},
	isbn = {978-88-00-74494-2},
	shorttitle = {Didattica della storia},
	pagetotal = {232},
	publisher = {Le Monnier Università : Mondadori Education},
	author = {Panciera, Walter and Zannini, Andrea},
	date = {2013},
}

@article{aghaziarati_artificial_2023,
	title = {Artificial Intelligence in Education: Investigating Teacher Attitudes},
	volume = {1},
	url = {https://journals.kmanpub.com/index.php/aitechbesosci/article/view/1973},
	doi = {10.61838/kman.aitech.1.1.6},
	shorttitle = {Artificial Intelligence in Education},
	abstract = {This study aims to investigate teachers' attitudes towards {AI} in education, focusing on identifying the perceived benefits, challenges, and ethical considerations associated with {AI} integration into teaching and learning environments. Utilizing a qualitative research design, this study conducted semi-structured interviews with 28 educators from various educational levels and disciplines. Thematic analysis was employed to analyze the interview data, identifying key themes and concepts related to teachers' perspectives on {AI} in education. Four main themes were identified: Pedagogical Impacts, Ethical and Social Considerations, Technological Challenges and Opportunities, and Perceptions of {AI} in Education. Pedagogical Impacts encompassed enhancing learning outcomes, curriculum integration, and the evolving roles of teachers. Ethical and Social Considerations highlighted concerns over data privacy, bias, and equity. Technological Challenges and Opportunities discussed integration challenges and the future of educational technology. Lastly, Perceptions of {AI} in Education revealed varied attitudes, awareness levels, and perceived impacts on professional identity. Teachers recognize the transformative potential of {AI} in enhancing personalized learning and operational efficiency. However, concerns about ethical issues, technological infrastructure, and the need for professional development are significant. Addressing these concerns requires targeted efforts from policymakers, educational leaders, and technologists to foster a supportive environment for {AI} integration in education.},
	pages = {35--42},
	number = {1},
	journaltitle = {{AI} and Tech in Behavioral and Social Sciences},
	shortjournal = {aitechbesosci},
	author = {Aghaziarati, Ali and Nejatifar, Sara and Abedi, Ahmad},
	urldate = {2025-01-26},
	date = {2023},
	file = {Full text:C\:\\Users\\moles\\Zotero\\storage\\XKZNLLZ8\\Aghaziarati et al. - 2023 - Artificial Intelligence in Education Investigating Teacher Attitudes.pdf:application/pdf},
}

@article{bonsu_consumers_2023,
	title = {From the Consumers’ Side: Determining Students’ Perception and Intention to Use {ChatGPTin} Ghanaian Higher Education},
	issn = {1556-5068},
	url = {https://www.ssrn.com/abstract=4387107},
	doi = {10.2139/ssrn.4387107},
	shorttitle = {From the Consumers’ Side},
	journaltitle = {{SSRN} Electronic Journal},
	shortjournal = {{SSRN} Journal},
	author = {Bonsu, Emmanuel and Baffour-Koduah, Daniel},
	urldate = {2025-01-26},
	date = {2023},
	langid = {english},
}

@article{garcia_sanchez_uso_2023,
	title = {Uso y percepción de {ChatGPT} en la educación superior},
	volume = {11},
	issn = {23870893},
	url = {https://riti.es/index.php/riti/article/view/261},
	doi = {10.36825/RITI.11.23.009},
	abstract = {This article aims to analyze higher education students' management and perception regarding using {ChatGPT} in their academic activities. To accomplish this, a descriptive study with a qualitative approach was employed to analyze the data obtained through a questionnaire administered to students from various majors at the Universidad Autónoma de Sinaloa. The instrument consisted of two sections with closed-ended questions and a Likert scale to measure the learners' perception. The results revealed that a minority of the respondents (33\%) had used {ChatGPT} in their school practices. Additionally, it was found that a significant proportion (75\%) did not consider the use of this tool suitable for their educational tasks, and a similar percentage (79\%) did not perceive improvements in their research and data analysis skills. A low dependence on using this tool for school assignments was  observed  (4\%),  along  with  a  lack  of  confidence  in  teachers'  preparedness  to  effectively  incorporate  this technology into their classes (83\%). In conclusion, educational institutions are recommended to carefully consider integrating artificial intelligence tools in didactic exercises, taking into account the concerns expressed by the students.},
	pages = {98--107},
	number = {23},
	journaltitle = {Revista de Investigación en Tecnologías de la Información},
	shortjournal = {{RITI}},
	author = {García Sánchez, Omar Vicente},
	urldate = {2025-01-26},
	date = {2023-06},
	file = {Full text:C\:\\Users\\moles\\Zotero\\storage\\3YYFJA8D\\García Sánchez - 2023 - Uso y percepción de ChatGPT en la educación superior.pdf:application/pdf},
}

@misc{aguilar_critical_2024,
	title = {Critical Thinking and Ethics in the Age of Generative {AI} in Education},
	rights = {https://creativecommons.org/licenses/by-nc-nd/4.0/legalcode},
	url = {https://osf.io/7dr9j},
	doi = {10.35542/osf.io/7dr9j},
	abstract = {This report is an invitation for educators, policymakers, technologists, and learners to consider how generative {AI} can contribute to the future of education?. It aims to lay down a foundation upon which we can start building an educational ecosystem that is dynamic, inclusive, and profoundly human, despite being significantly aided by artificial intelligence?.},
	publisher = {{EdArXiv}},
	author = {Aguilar, Stephen J and Swartout, William and Nye, Benjamin and Sinatra, Gale Marie and Wang, Changzhao and Bui, Eric},
	urldate = {2025-01-26},
	date = {2024-01-29},
	file = {Versione inviata:C\:\\Users\\moles\\Zotero\\storage\\S4NTADLT\\Aguilar et al. - 2024 - Critical Thinking and Ethics in the Age of Generative AI in Education.pdf:application/pdf},
}

@article{markauskaite_rethinking_2022,
	title = {Rethinking the entwinement between artificial intelligence and human learning: What capabilities do learners need for a world with {AI}?},
	volume = {3},
	issn = {2666920X},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S2666920X2200011X},
	doi = {10.1016/j.caeai.2022.100056},
	shorttitle = {Rethinking the entwinement between artificial intelligence and human learning},
	pages = {100056},
	journaltitle = {Computers and Education: Artificial Intelligence},
	shortjournal = {Computers and Education: Artificial Intelligence},
	author = {Markauskaite, Lina and Marrone, Rebecca and Poquet, Oleksandra and Knight, Simon and Martinez-Maldonado, Roberto and Howard, Sarah and Tondeur, Jo and De Laat, Maarten and Buckingham Shum, Simon and Gašević, Dragan and Siemens, George},
	urldate = {2025-01-26},
	date = {2022},
	langid = {english},
	file = {Full text:C\:\\Users\\moles\\Zotero\\storage\\ZJYQKP2V\\Markauskaite et al. - 2022 - Rethinking the entwinement between artificial intelligence and human learning What capabilities do.pdf:application/pdf},
}

@misc{xu_hallucination_2024,
	title = {Hallucination is Inevitable: An Innate Limitation of Large Language Models},
	rights = {{arXiv}.org perpetual, non-exclusive license},
	url = {https://arxiv.org/abs/2401.11817},
	doi = {10.48550/ARXIV.2401.11817},
	shorttitle = {Hallucination is Inevitable},
	abstract = {Hallucination has been widely recognized to be a significant drawback for large language models ({LLMs}). There have been many works that attempt to reduce the extent of hallucination. These efforts have mostly been empirical so far, which cannot answer the fundamental question whether it can be completely eliminated. In this paper, we formalize the problem and show that it is impossible to eliminate hallucination in {LLMs}. Specifically, we define a formal world where hallucination is defined as inconsistencies between a computable {LLM} and a computable ground truth function. By employing results from learning theory, we show that {LLMs} cannot learn all of the computable functions and will therefore always hallucinate. Since the formal world is a part of the real world which is much more complicated, hallucinations are also inevitable for real world {LLMs}. Furthermore, for real world {LLMs} constrained by provable time complexity, we describe the hallucination-prone tasks and empirically validate our claims. Finally, using the formal world framework, we discuss the possible mechanisms and efficacies of existing hallucination mitigators as well as the practical implications on the safe deployment of {LLMs}.},
	publisher = {{arXiv}},
	author = {Xu, Ziwei and Jain, Sanjay and Kankanhalli, Mohan},
	urldate = {2025-01-26},
	date = {2024},
	note = {Version Number: 1},
	keywords = {Computation and Language (cs.{CL}), {FOS}: Computer and information sciences, Machine Learning (cs.{LG}), Artificial Intelligence (cs.{AI})},
}

@article{chan_students_2023,
	title = {Students’ voices on generative {AI}: perceptions, benefits, and challenges in higher education},
	volume = {20},
	issn = {2365-9440},
	url = {https://educationaltechnologyjournal.springeropen.com/articles/10.1186/s41239-023-00411-8},
	doi = {10.1186/s41239-023-00411-8},
	shorttitle = {Students’ voices on generative {AI}},
	abstract = {Abstract
            This study explores university students’ perceptions of generative {AI} ({GenAI}) technologies, such as {ChatGPT}, in higher education, focusing on familiarity, their willingness to engage, potential benefits and challenges, and effective integration. A survey of 399 undergraduate and postgraduate students from various disciplines in Hong Kong revealed a generally positive attitude towards {GenAI} in teaching and learning. Students recognized the potential for personalized learning support, writing and brainstorming assistance, and research and analysis capabilities. However, concerns about accuracy, privacy, ethical issues, and the impact on personal development, career prospects, and societal values were also expressed. According to John Biggs’ 3P model, student perceptions significantly influence learning approaches and outcomes. By understanding students’ perceptions, educators and policymakers can tailor {GenAI} technologies to address needs and concerns while promoting effective learning outcomes. Insights from this study can inform policy development around the integration of {GenAI} technologies into higher education. By understanding students’ perceptions and addressing their concerns, policymakers can create well-informed guidelines and strategies for the responsible and effective implementation of {GenAI} tools, ultimately enhancing teaching and learning experiences in higher education.},
	pages = {43},
	number = {1},
	journaltitle = {International Journal of Educational Technology in Higher Education},
	shortjournal = {Int J Educ Technol High Educ},
	author = {Chan, Cecilia Ka Yuk and Hu, Wenjie},
	urldate = {2025-01-26},
	date = {2023-07-17},
	langid = {english},
	file = {Full text:C\:\\Users\\moles\\Zotero\\storage\\XEBCQGBK\\Chan e Hu - 2023 - Students’ voices on generative AI perceptions, benefits, and challenges in higher education.pdf:application/pdf},
}

@misc{dao_investigating_2023,
	title = {Investigating the Effectiveness of {ChatGPT} in Mathematical Reasoning and Problem Solving: Evidence from the Vietnamese National High School Graduation Examination},
	rights = {Creative Commons Attribution Share Alike 4.0 International},
	url = {https://arxiv.org/abs/2306.06331},
	doi = {10.48550/ARXIV.2306.06331},
	shorttitle = {Investigating the Effectiveness of {ChatGPT} in Mathematical Reasoning and Problem Solving},
	abstract = {This study offers a complete analysis of {ChatGPT}'s mathematics abilities in responding to multiple-choice questions for the Vietnamese National High School Graduation Examination ({VNHSGE}) on a range of subjects and difficulty levels. The dataset included 250 questions divided into four levels: knowledge (K), comprehension (C), application (A), and high application (H), and it included ten themes that covered diverse mathematical concepts. The outcomes demonstrate that {ChatGPT}'s performance varies depending on the difficulty level and subject. It performed best on questions at Level (K), with an accuracy rate of \$83{\textbackslash}\%\$; but, as the difficulty level rose, it scored poorly, with an accuracy rate of \$10{\textbackslash}\%\$. The study has also shown that {ChatGPT} significantly succeeds in providing responses to questions on subjects including exponential and logarithmic functions, geometric progression, and arithmetic progression. The study found that {ChatGPT} had difficulty correctly answering questions on topics including derivatives and applications, spatial geometry, and Oxyz spatial calculus. Additionally, this study contrasted {ChatGPT} outcomes with Vietnamese students in {VNHSGE} and in other math competitions. {ChatGPT} dominated in the {SAT} Math competition with a success rate of \$70{\textbackslash}\%\$, followed by {VNHSGE} mathematics (\$58.8{\textbackslash}\%)\$. However, its success rates were lower on other exams, such as {AP} Statistics, the {GRE} Quantitative, {AMC} 10, {AMC} 12, and {AP} Calculus {BC}. These results suggest that {ChatGPT} has the potential to be an effective teaching tool for mathematics, but more work is needed to enhance its handling of graphical data and address the challenges presented by questions that are getting more challenging.},
	publisher = {{arXiv}},
	author = {Dao, Xuan-Quy and Le, Ngoc-Bich},
	urldate = {2025-01-26},
	date = {2023},
	note = {Version Number: 3},
	keywords = {Computation and Language (cs.{CL}), {FOS}: Computer and information sciences, Machine Learning (cs.{LG})},
}

@misc{hsieh_ruler_2024,
	title = {{RULER}: What's the Real Context Size of Your Long-Context Language Models?},
	rights = {Creative Commons Attribution 4.0 International},
	url = {https://arxiv.org/abs/2404.06654},
	doi = {10.48550/ARXIV.2404.06654},
	shorttitle = {{RULER}},
	abstract = {The needle-in-a-haystack ({NIAH}) test, which examines the ability to retrieve a piece of information (the "needle") from long distractor texts (the "haystack"), has been widely adopted to evaluate long-context language models ({LMs}). However, this simple retrieval-based test is indicative of only a superficial form of long-context understanding. To provide a more comprehensive evaluation of long-context {LMs}, we create a new synthetic benchmark {RULER} with flexible configurations for customized sequence length and task complexity. {RULER} expands upon the vanilla {NIAH} test to encompass variations with diverse types and quantities of needles. Moreover, {RULER} introduces new task categories multi-hop tracing and aggregation to test behaviors beyond searching from context. We evaluate 17 long-context {LMs} with 13 representative tasks in {RULER}. Despite achieving nearly perfect accuracy in the vanilla {NIAH} test, almost all models exhibit large performance drops as the context length increases. While these models all claim context sizes of 32K tokens or greater, only half of them can maintain satisfactory performance at the length of 32K. Our analysis of Yi-34B, which supports context length of 200K, reveals large room for improvement as we increase input length and task complexity. We open source {RULER} to spur comprehensive evaluation of long-context {LMs}.},
	publisher = {{arXiv}},
	author = {Hsieh, Cheng-Ping and Sun, Simeng and Kriman, Samuel and Acharya, Shantanu and Rekesh, Dima and Jia, Fei and Zhang, Yang and Ginsburg, Boris},
	urldate = {2025-01-26},
	date = {2024},
	note = {Version Number: 3},
	keywords = {Computation and Language (cs.{CL}), {FOS}: Computer and information sciences},
}

@misc{kojima_large_2022,
	title = {Large Language Models are Zero-Shot Reasoners},
	rights = {{arXiv}.org perpetual, non-exclusive license},
	url = {https://arxiv.org/abs/2205.11916},
	doi = {10.48550/ARXIV.2205.11916},
	abstract = {Pretrained large language models ({LLMs}) are widely used in many sub-fields of natural language processing ({NLP}) and generally known as excellent few-shot learners with task-specific exemplars. Notably, chain of thought ({CoT}) prompting, a recent technique for eliciting complex multi-step reasoning through step-by-step answer examples, achieved the state-of-the-art performances in arithmetics and symbolic reasoning, difficult system-2 tasks that do not follow the standard scaling laws for {LLMs}. While these successes are often attributed to {LLMs}' ability for few-shot learning, we show that {LLMs} are decent zero-shot reasoners by simply adding "Let's think step by step" before each answer. Experimental results demonstrate that our Zero-shot-{CoT}, using the same single prompt template, significantly outperforms zero-shot {LLM} performances on diverse benchmark reasoning tasks including arithmetics ({MultiArith}, {GSM}8K, {AQUA}-{RAT}, {SVAMP}), symbolic reasoning (Last Letter, Coin Flip), and other logical reasoning tasks (Date Understanding, Tracking Shuffled Objects), without any hand-crafted few-shot examples, e.g. increasing the accuracy on {MultiArith} from 17.7\% to 78.7\% and {GSM}8K from 10.4\% to 40.7\% with large {InstructGPT} model (text-davinci-002), as well as similar magnitudes of improvements with another off-the-shelf large model, 540B parameter {PaLM}. The versatility of this single prompt across very diverse reasoning tasks hints at untapped and understudied fundamental zero-shot capabilities of {LLMs}, suggesting high-level, multi-task broad cognitive capabilities may be extracted by simple prompting. We hope our work not only serves as the minimal strongest zero-shot baseline for the challenging reasoning benchmarks, but also highlights the importance of carefully exploring and analyzing the enormous zero-shot knowledge hidden inside {LLMs} before crafting finetuning datasets or few-shot exemplars.},
	publisher = {{arXiv}},
	author = {Kojima, Takeshi and Gu, Shixiang Shane and Reid, Machel and Matsuo, Yutaka and Iwasawa, Yusuke},
	urldate = {2025-01-26},
	date = {2022},
	note = {Version Number: 4},
	keywords = {Computation and Language (cs.{CL}), {FOS}: Computer and information sciences, Machine Learning (cs.{LG}), Artificial Intelligence (cs.{AI})},
}

@inproceedings{liu_generated_2022,
	location = {Dublin, Ireland},
	title = {Generated Knowledge Prompting for Commonsense Reasoning},
	url = {https://aclanthology.org/2022.acl-long.225},
	doi = {10.18653/v1/2022.acl-long.225},
	eventtitle = {Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
	pages = {3154--3169},
	booktitle = {Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
	publisher = {Association for Computational Linguistics},
	author = {Liu, Jiacheng and Liu, Alisa and Lu, Ximing and Welleck, Sean and West, Peter and Le Bras, Ronan and Choi, Yejin and Hajishirzi, Hannaneh},
	urldate = {2025-01-26},
	date = {2022},
	langid = {english},
	file = {Full text:C\:\\Users\\moles\\Zotero\\storage\\Y2AZTRPW\\Liu et al. - 2022 - Generated Knowledge Prompting for Commonsense Reasoning.pdf:application/pdf},
}

@article{makrygiannakis_evidence-based_2024,
	title = {Evidence-based potential of generative artificial intelligence large language models in orthodontics: a comparative study of {ChatGPT}, Google Bard, and Microsoft Bing},
	rights = {https://creativecommons.org/licenses/by/4.0/},
	issn = {0141-5387, 1460-2210},
	url = {https://academic.oup.com/ejo/advance-article/doi/10.1093/ejo/cjae017/7645326},
	doi = {10.1093/ejo/cjae017},
	shorttitle = {Evidence-based potential of generative artificial intelligence large language models in orthodontics},
	abstract = {Summary
            
              Background
              The increasing utilization of large language models ({LLMs}) in Generative Artificial Intelligence across various medical and dental fields, and specifically orthodontics, raises questions about their accuracy.
            
            
              Objective
              This study aimed to assess and compare the answers offered by four {LLMs}: Google’s Bard, {OpenAI}’s {ChatGPT}-3.5, and {ChatGPT}-4, and Microsoft’s Bing, in response to clinically relevant questions within the field of orthodontics.
            
            
              Materials and methods
              Ten open-type clinical orthodontics-related questions were posed to the {LLMs}. The responses provided by the {LLMs} were assessed on a scale ranging from 0 (minimum) to 10 (maximum) points, benchmarked against robust scientific evidence, including consensus statements and systematic reviews, using a predefined rubric. After a 4-week interval from the initial evaluation, the answers were reevaluated to gauge intra-evaluator reliability. Statistical comparisons were conducted on the scores using Friedman’s and Wilcoxon’s tests to identify the model providing the answers with the most comprehensiveness, scientific accuracy, clarity, and relevance.
            
            
              Results
              Overall, no statistically significant differences between the scores given by the two evaluators, on both scoring occasions, were detected, so an average score for every {LLM} was computed. The {LLM} answers scoring the highest, were those of Microsoft Bing Chat (average score = 7.1), followed by {ChatGPT} 4 (average score = 4.7), Google Bard (average score = 4.6), and finally {ChatGPT} 3.5 (average score 3.8). While Microsoft Bing Chat statistically outperformed {ChatGPT}-3.5 (P-value = 0.017) and Google Bard (P-value = 0.029), as well, and Chat {GPT}-4 outperformed Chat {GPT}-3.5 (P-value = 0.011), all models occasionally produced answers with a lack of comprehensiveness, scientific accuracy, clarity, and relevance.
            
            
              Limitations
              The questions asked were indicative and did not cover the entire field of orthodontics.
            
            
              Conclusions
              Language models ({LLMs}) show great potential in supporting evidence-based orthodontics. However, their current limitations pose a potential risk of making incorrect healthcare decisions if utilized without careful consideration. Consequently, these tools cannot serve as a substitute for the orthodontist’s essential critical thinking and comprehensive subject knowledge. For effective integration into practice, further research, clinical validation, and enhancements to the models are essential. Clinicians must be mindful of the limitations of {LLMs}, as their imprudent utilization could have adverse effects on patient care.},
	pages = {cjae017},
	journaltitle = {European Journal of Orthodontics},
	author = {Makrygiannakis, Miltiadis A and Giannakopoulos, Kostis and Kaklamanos, Eleftherios G},
	urldate = {2025-01-26},
	date = {2024-04-13},
	langid = {english},
	file = {Full text:C\:\\Users\\moles\\Zotero\\storage\\IQIANDKV\\Makrygiannakis et al. - 2024 - Evidence-based potential of generative artificial intelligence large language models in orthodontics.pdf:application/pdf},
}

@misc{min_rethinking_2022,
	title = {Rethinking the Role of Demonstrations: What Makes In-Context Learning Work?},
	rights = {Creative Commons Attribution 4.0 International},
	url = {https://arxiv.org/abs/2202.12837},
	doi = {10.48550/ARXIV.2202.12837},
	shorttitle = {Rethinking the Role of Demonstrations},
	abstract = {Large language models ({LMs}) are able to in-context learn -- perform a new task via inference alone by conditioning on a few input-label pairs (demonstrations) and making predictions for new inputs. However, there has been little understanding of how the model learns and which aspects of the demonstrations contribute to end task performance. In this paper, we show that ground truth demonstrations are in fact not required -- randomly replacing labels in the demonstrations barely hurts performance on a range of classification and multi-choce tasks, consistently over 12 different models including {GPT}-3. Instead, we find that other aspects of the demonstrations are the key drivers of end task performance, including the fact that they provide a few examples of (1) the label space, (2) the distribution of the input text, and (3) the overall format of the sequence. Together, our analysis provides a new way of understanding how and why in-context learning works, while opening up new questions about how much can be learned from large language models through inference alone.},
	publisher = {{arXiv}},
	author = {Min, Sewon and Lyu, Xinxi and Holtzman, Ari and Artetxe, Mikel and Lewis, Mike and Hajishirzi, Hannaneh and Zettlemoyer, Luke},
	urldate = {2025-01-26},
	date = {2022},
	note = {Version Number: 2},
	keywords = {Computation and Language (cs.{CL}), {FOS}: Computer and information sciences, Artificial Intelligence (cs.{AI})},
}

@article{stribling_model_2024,
	title = {The model student: {GPT}-4 performance on graduate biomedical science exams},
	volume = {14},
	issn = {2045-2322},
	url = {https://www.nature.com/articles/s41598-024-55568-7},
	doi = {10.1038/s41598-024-55568-7},
	shorttitle = {The model student},
	abstract = {Abstract
            The {GPT}-4 large language model ({LLM}) and {ChatGPT} chatbot have emerged as accessible and capable tools for generating English-language text in a variety of formats. {GPT}-4 has previously performed well when applied to questions from multiple standardized examinations. However, further evaluation of trustworthiness and accuracy of {GPT}-4 responses across various knowledge domains is essential before its use as a reference resource. Here, we assess {GPT}-4 performance on nine graduate-level examinations in the biomedical sciences (seven blinded), finding that {GPT}-4 scores exceed the student average in seven of nine cases and exceed all student scores for four exams. {GPT}-4 performed very well on fill-in-the-blank, short-answer, and essay questions, and correctly answered several questions on figures sourced from published manuscripts. Conversely, {GPT}-4 performed poorly on questions with figures containing simulated data and those requiring a hand-drawn answer. Two {GPT}-4 answer-sets were flagged as plagiarism based on answer similarity and some model responses included detailed hallucinations. In addition to assessing {GPT}-4 performance, we discuss patterns and limitations in {GPT}-4 capabilities with the goal of informing design of future academic examinations in the chatbot era.},
	pages = {5670},
	number = {1},
	journaltitle = {Scientific Reports},
	shortjournal = {Sci Rep},
	author = {Stribling, Daniel and Xia, Yuxing and Amer, Maha K. and Graim, Kiley S. and Mulligan, Connie J. and Renne, Rolf},
	urldate = {2025-01-26},
	date = {2024-03-07},
	langid = {english},
	file = {Full text:C\:\\Users\\moles\\Zotero\\storage\\P9FTCLI4\\Stribling et al. - 2024 - The model student GPT-4 performance on graduate biomedical science exams.pdf:application/pdf},
}

@misc{vaswani_attention_2017,
	title = {Attention Is All You Need},
	rights = {{arXiv}.org perpetual, non-exclusive license},
	url = {https://arxiv.org/abs/1706.03762},
	doi = {10.48550/ARXIV.1706.03762},
	abstract = {The dominant sequence transduction models are based on complex recurrent or convolutional neural networks in an encoder-decoder configuration. The best performing models also connect the encoder and decoder through an attention mechanism. We propose a new simple network architecture, the Transformer, based solely on attention mechanisms, dispensing with recurrence and convolutions entirely. Experiments on two machine translation tasks show these models to be superior in quality while being more parallelizable and requiring significantly less time to train. Our model achieves 28.4 {BLEU} on the {WMT} 2014 English-to-German translation task, improving over the existing best results, including ensembles by over 2 {BLEU}. On the {WMT} 2014 English-to-French translation task, our model establishes a new single-model state-of-the-art {BLEU} score of 41.8 after training for 3.5 days on eight {GPUs}, a small fraction of the training costs of the best models from the literature. We show that the Transformer generalizes well to other tasks by applying it successfully to English constituency parsing both with large and limited training data.},
	publisher = {{arXiv}},
	author = {Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N. and Kaiser, Lukasz and Polosukhin, Illia},
	urldate = {2025-01-26},
	date = {2017},
	note = {Version Number: 7},
	keywords = {Computation and Language (cs.{CL}), {FOS}: Computer and information sciences, Machine Learning (cs.{LG})},
}

@misc{wang_searching_2024,
	title = {Searching for Best Practices in Retrieval-Augmented Generation},
	rights = {Creative Commons Attribution 4.0 International},
	url = {https://arxiv.org/abs/2407.01219},
	doi = {10.48550/ARXIV.2407.01219},
	abstract = {Retrieval-augmented generation ({RAG}) techniques have proven to be effective in integrating up-to-date information, mitigating hallucinations, and enhancing response quality, particularly in specialized domains. While many {RAG} approaches have been proposed to enhance large language models through query-dependent retrievals, these approaches still suffer from their complex implementation and prolonged response times. Typically, a {RAG} workflow involves multiple processing steps, each of which can be executed in various ways. Here, we investigate existing {RAG} approaches and their potential combinations to identify optimal {RAG} practices. Through extensive experiments, we suggest several strategies for deploying {RAG} that balance both performance and efficiency. Moreover, we demonstrate that multimodal retrieval techniques can significantly enhance question-answering capabilities about visual inputs and accelerate the generation of multimodal content using a "retrieval as generation" strategy.},
	publisher = {{arXiv}},
	author = {Wang, Xiaohua and Wang, Zhenghua and Gao, Xuan and Zhang, Feiran and Wu, Yixin and Xu, Zhibo and Shi, Tianyuan and Wang, Zhengyuan and Li, Shizheng and Qian, Qi and Yin, Ruicheng and Lv, Changze and Zheng, Xiaoqing and Huang, Xuanjing},
	urldate = {2025-01-26},
	date = {2024},
	note = {Version Number: 1},
	keywords = {Computation and Language (cs.{CL}), {FOS}: Computer and information sciences},
}

@misc{wei_chain--thought_2022,
	title = {Chain-of-Thought Prompting Elicits Reasoning in Large Language Models},
	rights = {Creative Commons Attribution 4.0 International},
	url = {https://arxiv.org/abs/2201.11903},
	doi = {10.48550/ARXIV.2201.11903},
	abstract = {We explore how generating a chain of thought -- a series of intermediate reasoning steps -- significantly improves the ability of large language models to perform complex reasoning. In particular, we show how such reasoning abilities emerge naturally in sufficiently large language models via a simple method called chain of thought prompting, where a few chain of thought demonstrations are provided as exemplars in prompting. Experiments on three large language models show that chain of thought prompting improves performance on a range of arithmetic, commonsense, and symbolic reasoning tasks. The empirical gains can be striking. For instance, prompting a 540B-parameter language model with just eight chain of thought exemplars achieves state of the art accuracy on the {GSM}8K benchmark of math word problems, surpassing even finetuned {GPT}-3 with a verifier.},
	publisher = {{arXiv}},
	author = {Wei, Jason and Wang, Xuezhi and Schuurmans, Dale and Bosma, Maarten and Ichter, Brian and Xia, Fei and Chi, Ed and Le, Quoc and Zhou, Denny},
	urldate = {2025-01-26},
	date = {2022},
	note = {Version Number: 6},
	keywords = {Computation and Language (cs.{CL}), {FOS}: Computer and information sciences, Artificial Intelligence (cs.{AI})},
}

@article{yu_reflection_2023,
	title = {Reflection on whether Chat {GPT} should be banned by academia from the perspective of education and teaching},
	volume = {14},
	issn = {1664-1078},
	url = {https://www.frontiersin.org/articles/10.3389/fpsyg.2023.1181712/full},
	doi = {10.3389/fpsyg.2023.1181712},
	pages = {1181712},
	journaltitle = {Frontiers in Psychology},
	shortjournal = {Front. Psychol.},
	author = {Yu, Hao},
	urldate = {2025-01-26},
	date = {2023-06-01},
	file = {Full text:C\:\\Users\\moles\\Zotero\\storage\\Y9IJ5TPF\\Yu - 2023 - Reflection on whether Chat GPT should be banned by academia from the perspective of education and te.pdf:application/pdf},
}

@misc{shi_detecting_2023,
	title = {Detecting Pretraining Data from Large Language Models},
	rights = {Creative Commons Attribution 4.0 International},
	url = {https://arxiv.org/abs/2310.16789},
	doi = {10.48550/ARXIV.2310.16789},
	abstract = {Although large language models ({LLMs}) are widely deployed, the data used to train them is rarely disclosed. Given the incredible scale of this data, up to trillions of tokens, it is all but certain that it includes potentially problematic text such as copyrighted materials, personally identifiable information, and test data for widely reported reference benchmarks. However, we currently have no way to know which data of these types is included or in what proportions. In this paper, we study the pretraining data detection problem: given a piece of text and black-box access to an {LLM} without knowing the pretraining data, can we determine if the model was trained on the provided text? To facilitate this study, we introduce a dynamic benchmark {WIKIMIA} that uses data created before and after model training to support gold truth detection. We also introduce a new detection method Min-K\% Prob based on a simple hypothesis: an unseen example is likely to contain a few outlier words with low probabilities under the {LLM}, while a seen example is less likely to have words with such low probabilities. Min-K\% Prob can be applied without any knowledge about the pretraining corpus or any additional training, departing from previous detection methods that require training a reference model on data that is similar to the pretraining data. Moreover, our experiments demonstrate that Min-K\% Prob achieves a 7.4\% improvement on {WIKIMIA} over these previous methods. We apply Min-K\% Prob to three real-world scenarios, copyrighted book detection, contaminated downstream example detection and privacy auditing of machine unlearning, and find it a consistently effective solution.},
	publisher = {{arXiv}},
	author = {Shi, Weijia and Ajith, Anirudh and Xia, Mengzhou and Huang, Yangsibo and Liu, Daogao and Blevins, Terra and Chen, Danqi and Zettlemoyer, Luke},
	urldate = {2025-01-26},
	date = {2023},
	note = {Version Number: 3},
	keywords = {Computation and Language (cs.{CL}), {FOS}: Computer and information sciences, Machine Learning (cs.{LG}), Cryptography and Security (cs.{CR})},
}

@article{fernandes_ai_2024,
	title = {{AI} Training and Copyright: Should Intellectual Property Law Allow Machines to Learn?},
	volume = {10},
	rights = {http://creativecommons.org/licenses/by/4.0},
	issn = {2653-8660},
	url = {https://ejournals.epublishing.ekt.gr/index.php/bioethica/article/view/39041},
	doi = {10.12681/bioeth.39041},
	shorttitle = {{AI} Training and Copyright},
	abstract = {This article examines the intricate legal landscape surrounding the use of copyrighted materials in the development of artificial intelligence ({AI}). It explores the rise of {AI} and its reliance on data, emphasizing the importance of data availability for machine learning ({ML}) systems. The article analyzes current relevant legislation across the European Union, United States, and Japan, highlighting the legal ambiguities and constraints posed by {IP} rights, particularly copyright. It discusses possible new solutions, referencing the World Intellectual Property Organization's ({WIPO}) call for discussions on {AI} and {IP} policy. The conclusion stresses the need to balance the interests of {AI} developers and {IP} rights holders to promote technological advancement while safeguarding creativity and originality.},
	pages = {8--21},
	number = {2},
	journaltitle = {Bioethica},
	shortjournal = {Bioethica},
	author = {Fernandes, Pedro Martins},
	urldate = {2025-01-26},
	date = {2024-10-01},
	file = {Full Text PDF:C\:\\Users\\moles\\Zotero\\storage\\9HW4C4Z6\\Fernandes - 2024 - AI Training and Copyright Should Intellectual Property Law Allow Machines to Learn.pdf:application/pdf},
}

@article{buick_copyright_2024,
	title = {Copyright and {AI} training data—transparency to the rescue?},
	rights = {https://creativecommons.org/licenses/by-nc-nd/4.0/},
	issn = {1747-1532, 1747-1540},
	url = {https://academic.oup.com/jiplp/advance-article/doi/10.1093/jiplp/jpae102/7922541},
	doi = {10.1093/jiplp/jpae102},
	abstract = {Abstract
            Generative Artificial Intelligence ({AI}) models must be trained on vast quantities of data, much of which is composed of copyrighted material. However, {AI} developers frequently use such content without seeking permission from rightsholders, leading to calls for requirements to disclose information on the contents of {AI} training data. These demands have won an early success through the inclusion of such requirements in the {EU}’s {AI} Act. This article argues that such transparency requirements alone cannot rescue us from the difficult question of how best to respond to the fundamental challenges generative {AI} poses to copyright law. This is because the impact of transparency requirements is contingent on existing copyright laws; if these do not adequately address the challenges presented by generative {AI}, transparency will not provide a solution. This is exemplified by the transparency requirements of the {AI} Act, which are explicitly designed to facilitate the enforcement of the right to opt-out of text and data mining under the Copyright in the Digital Single Market Directive. Because the transparency requirements do not sufficiently address the underlying flaws of this opt-out, they are unlikely to provide any meaningful improvement to the position of individual rightsholders. Transparency requirements are thus a necessary but not sufficient measure to achieve a fair and equitable balance between innovation and protection for rightsholders. Policymakers must therefore look beyond such requirements and consider further action to address the complex challenge presented to copyright law by generative {AI}.},
	pages = {jpae102},
	journaltitle = {Journal Of Intellectual Property Law and Practice},
	author = {Buick, Adam},
	urldate = {2025-01-26},
	date = {2024-12-12},
	langid = {english},
	file = {Full Text PDF:C\:\\Users\\moles\\Zotero\\storage\\KANHFKHS\\Buick - 2024 - Copyright and AI training data—transparency to the rescue.pdf:application/pdf},
}

@misc{azizy_adversarial_2024,
	title = {Adversarial vulnerability following {LLM} low-resource language fine-tuning: Short report},
	rights = {https://creativecommons.org/licenses/by/4.0/legalcode},
	url = {https://osf.io/bzd6w},
	doi = {10.31219/osf.io/bzd6w},
	shorttitle = {Adversarial vulnerability following {LLM} low-resource language fine-tuning},
	abstract = {We briefly report how fine-tuning a multilingual {LLM} with a low-resource language resulted in an increased vulnerability to adversarial attacks. We fine-tuned {GPT}-3.5 Turbo (gpt-3.5-0125) with 560 input-output pairs ({\textasciitilde}274k tokens) of Krama Javanese, a high register of Javanese (a low-resource language). We report brief qualitative and quantitative observations that 1. The fine-tuned model is more compliant towards adversarial prompts, 2. Unsuccessful prompts can be successful when concatenated with an elaboration string, e.g., step-by-step prompting or by specifying details, 3. The model can be prompted in the fine-tuned language to respond in English, thus providing a way to produce harmful responses in a different language. The fine-tuned model sees a 45.1\% increase of {GPT}-4-rated sum of harmfulness for Krama Javanese responses and a 13.8\% increase for English responses. Notably, all of these vulnerabilities can be reached very effectively with the benign nature and our small dataset size. Our work contributes knowledge in the intersection of {AI} safety and multilingual models, indicating that fine-tuning an {LLM} on a  low-resource language should include additional data examples for retaining safety guardrails.},
	publisher = {Open Science Framework},
	author = {Azizy, Afrizal Hasbi and Cahyanto, Nuel Bagus},
	urldate = {2025-01-26},
	date = {2024-05-03},
	file = {Versione inviata:C\:\\Users\\moles\\Zotero\\storage\\X9DLH5RF\\Azizy e Cahyanto - 2024 - Adversarial vulnerability following LLM low-resource language fine-tuning Short report.pdf:application/pdf},
}

@incollection{kucharavy_exploring_2024,
	location = {Cham},
	title = {Exploring the Dual Role of {LLMs} in Cybersecurity: Threats and Defenses},
	isbn = {978-3-031-54826-0 978-3-031-54827-7},
	url = {https://link.springer.com/10.1007/978-3-031-54827-7_26},
	shorttitle = {Exploring the Dual Role of {LLMs} in Cybersecurity},
	abstract = {Abstract
            
              Large Language Models
              ({LLMs}) pose risks for cybersecurity since they facilitate minimal cost creation of malware, phishing messages, and malicious chatbots. At the same time, {LLMs} can help defend against cyberattacks. This chapter reviews security research around the risks and benefits of {LLMs}.},
	pages = {235--242},
	booktitle = {Large Language Models in Cybersecurity},
	publisher = {Springer Nature Switzerland},
	author = {Bryce, Ciarán and Kalousis, Alexandros and Leroux, Ilan and Madinier, Hélène and Pasche, Thomas and Ruch, Patrick},
	editor = {Kucharavy, Andrei and Plancherel, Octave and Mulder, Valentin and Mermoud, Alain and Lenders, Vincent},
	urldate = {2025-01-26},
	date = {2024},
	langid = {english},
	doi = {10.1007/978-3-031-54827-7_26},
	file = {Full text:C\:\\Users\\moles\\Zotero\\storage\\X6FST7PE\\Bryce et al. - 2024 - Exploring the Dual Role of LLMs in Cybersecurity Threats and Defenses.pdf:application/pdf},
}

@article{gupta_chatgpt_2023,
	title = {From {ChatGPT} to {ThreatGPT}: Impact of Generative {AI} in Cybersecurity and Privacy},
	volume = {11},
	rights = {https://creativecommons.org/licenses/by-nc-nd/4.0/},
	issn = {2169-3536},
	url = {https://ieeexplore.ieee.org/document/10198233/},
	doi = {10.1109/ACCESS.2023.3300381},
	shorttitle = {From {ChatGPT} to {ThreatGPT}},
	pages = {80218--80245},
	journaltitle = {{IEEE} Access},
	shortjournal = {{IEEE} Access},
	author = {Gupta, Maanak and Akiri, Charankumar and Aryal, Kshitiz and Parker, Eli and Praharaj, Lopamudra},
	urldate = {2025-01-26},
	date = {2023},
	file = {Versione inviata:C\:\\Users\\moles\\Zotero\\storage\\4VMZCSZK\\Gupta et al. - 2023 - From ChatGPT to ThreatGPT Impact of Generative AI in Cybersecurity and Privacy.pdf:application/pdf},
}

@misc{mahato_red_2024,
	title = {Red Teaming for Multimodal Large Language Models: A Survey},
	rights = {https://creativecommons.org/licenses/by/4.0/},
	url = {https://www.techrxiv.org/users/717031/articles/701792-red-teaming-for-multimodal-large-language-models-a-survey?commit=78974318f47d2573ffe2d51622dee3c7268dddd5},
	doi = {10.36227/techrxiv.170629758.87975697/v1},
	shorttitle = {Red Teaming for Multimodal Large Language Models},
	publisher = {Preprints},
	author = {Mahato, Moushumi and Kumar, Avinash and Singh, Kartikey and Kukreja, Bhavesh and Nabi, Javaid},
	urldate = {2025-01-26},
	date = {2024-01-26},
	file = {Versione inviata:C\:\\Users\\moles\\Zotero\\storage\\UB8SAQWI\\Mahato et al. - 2024 - Red Teaming for Multimodal Large Language Models A Survey.pdf:application/pdf},
}

@article{menz_health_2024,
	title = {Health Disinformation Use Case Highlighting the Urgent Need for Artificial Intelligence Vigilance: Weapons of Mass Disinformation},
	volume = {184},
	issn = {2168-6106},
	url = {https://jamanetwork.com/journals/jamainternalmedicine/fullarticle/2811333},
	doi = {10.1001/jamainternmed.2023.5947},
	shorttitle = {Health Disinformation Use Case Highlighting the Urgent Need for Artificial Intelligence Vigilance},
	abstract = {Importance
              Although artificial intelligence ({AI}) offers many promises across modern medicine, it may carry a significant risk for the mass generation of targeted health disinformation. This poses an urgent threat toward public health initiatives and calls for rapid attention by health care professionals, {AI} developers, and regulators to ensure public safety.
            
            
              Observations
              As an example, using a single publicly available large-language model, within 65 minutes, 102 distinct blog articles were generated that contained more than 17 000 words of disinformation related to vaccines and vaping. Each post was coercive and targeted at diverse societal groups, including young adults, young parents, older persons, pregnant people, and those with chronic health conditions. The blogs included fake patient and clinician testimonials and obeyed prompting for the inclusion of scientific-looking referencing. Additional generative {AI} tools created an accompanying 20 realistic images in less than 2 minutes. This process was undertaken by health care professionals and researchers with no specialized knowledge in bypassing {AI} guardrails, relying solely on publicly available information.
            
            
              Conclusions and Relevance
              These observations demonstrate that when the guardrails of {AI} tools are insufficient, the ability to rapidly generate diverse and large amounts of convincing disinformation is profound. Beyond providing 2 example scenarios, these findings demonstrate an urgent need for robust {AI} vigilance. The {AI} tools are rapidly progressing; alongside these advancements, emergent risks are becoming increasingly apparent. Key pillars of pharmacovigilance—including transparency, surveillance, and regulation—may serve as valuable examples for managing these risks and safeguarding public health.},
	pages = {92},
	number = {1},
	journaltitle = {{JAMA} Internal Medicine},
	shortjournal = {{JAMA} Intern Med},
	author = {Menz, Bradley D. and Modi, Natansh D. and Sorich, Michael J. and Hopkins, Ashley M.},
	urldate = {2025-01-26},
	date = {2024-01-01},
	langid = {english},
}

@article{qi_visual_2024,
	title = {Visual Adversarial Examples Jailbreak Aligned Large Language Models},
	volume = {38},
	issn = {2374-3468, 2159-5399},
	url = {https://ojs.aaai.org/index.php/AAAI/article/view/30150},
	doi = {10.1609/aaai.v38i19.30150},
	abstract = {Warning: this paper contains data, prompts, and model outputs that are offensive in nature.

Recently, there has been a surge of interest in integrating vision into Large Language Models ({LLMs}), exemplified by Visual Language Models ({VLMs}) such as Flamingo and {GPT}-4. This paper sheds light on the security and safety implications of this trend. First, we underscore that the continuous and high-dimensional nature of the visual input makes it a weak link against adversarial attacks, representing an expanded attack surface of vision-integrated {LLMs}. Second, we highlight that the versatility of {LLMs} also presents visual attackers with a wider array of achievable adversarial objectives, extending the implications of security failures beyond mere misclassification. As an illustration, we present a case study in which we exploit visual adversarial examples to circumvent the safety guardrail of aligned {LLMs} with integrated vision. Intriguingly, we discover that a single visual adversarial example can universally jailbreak an aligned {LLM}, compelling it to heed a wide range of harmful instructions (that it otherwise would not) and generate harmful content that transcends the narrow scope of a `few-shot' derogatory corpus initially employed to optimize the adversarial example. Our study underscores the escalating adversarial risks associated with the pursuit of multimodality. Our findings also connect the long-studied adversarial vulnerabilities of neural networks to the nascent field of {AI} alignment. The presented attack suggests a fundamental adversarial challenge for {AI} alignment, especially in light of the emerging trend toward multimodality in frontier foundation models.},
	pages = {21527--21536},
	number = {19},
	journaltitle = {Proceedings of the {AAAI} Conference on Artificial Intelligence},
	shortjournal = {{AAAI}},
	author = {Qi, Xiangyu and Huang, Kaixuan and Panda, Ashwinee and Henderson, Peter and Wang, Mengdi and Mittal, Prateek},
	urldate = {2025-01-26},
	date = {2024-03-24},
	file = {Full text:C\:\\Users\\moles\\Zotero\\storage\\3C9HJG8B\\Qi et al. - 2024 - Visual Adversarial Examples Jailbreak Aligned Large Language Models.pdf:application/pdf},
}

@inproceedings{roy_probing_2023,
	location = {Singapore},
	title = {Probing {LLMs} for hate speech detection: strengths and vulnerabilities},
	url = {https://aclanthology.org/2023.findings-emnlp.407},
	doi = {10.18653/v1/2023.findings-emnlp.407},
	shorttitle = {Probing {LLMs} for hate speech detection},
	eventtitle = {Findings of the Association for Computational Linguistics: {EMNLP} 2023},
	pages = {6116--6128},
	booktitle = {Findings of the Association for Computational Linguistics: {EMNLP} 2023},
	publisher = {Association for Computational Linguistics},
	author = {Roy, Sarthak and Harshvardhan, Ashish and Mukherjee, Animesh and Saha, Punyajoy},
	urldate = {2025-01-26},
	date = {2023},
	langid = {english},
	file = {Full text:C\:\\Users\\moles\\Zotero\\storage\\YJSEI5B3\\Roy et al. - 2023 - Probing LLMs for hate speech detection strengths and vulnerabilities.pdf:application/pdf},
}

@misc{urman_silence_2023,
	title = {The Silence of the {LLMs}: Cross-Lingual Analysis of Political Bias and False Information Prevalence in {ChatGPT}, Google Bard, and Bing Chat},
	rights = {https://creativecommons.org/licenses/by/4.0/legalcode},
	url = {https://osf.io/q9v8f},
	doi = {10.31219/osf.io/q9v8f},
	shorttitle = {The Silence of the {LLMs}},
	abstract = {This article presents a comparative analysis of political bias in the outputs of three Large Language Model ({LLM})-based chatbots - {ChatGPT}, Bing Chat, and Bard - in response to political queries concerning the authoritarian regime in Russia. We investigate whether safeguards implemented in these chatbots contribute to the censorship of information that is viewed as harmful by the regime, in particular information about Vladimir Putin and the Russian war against Ukraine, and whether these safeguards enable the generation of false claims, in particular in relation to the regime's internal and external opponents. To detect whether {LLM} safeguards reiterate political bias, the article compares the outputs of prompts focusing on Putin's regime and the ones dealing with the Russian opposition and the {US} and Ukrainian politicians. It also examines whether the degree of bias varies depending on the language of the prompt and compares outputs concerning political personalities and issues across three languages: Russian, Ukrainian, and English. The results reveal significant disparities in how individual chatbots withhold politics-related information or produce false claims in relation to it. Notably, Bard consistently refused to respond to queries about Vladimir Putin in Russian, even when the relevant information was accessible via Google Search, and generally followed the censorship guidelines that, according to Yandex-related data leaks, were issued by the Russian authorities. In terms of false claims, we find substantial variation across languages with Ukrainian and Russian prompts generating false information more often and Bard being more prone to produce false claims in relation to Russian regime opponents (e.g., Navalny or Zelenskyy) than other chatbots. This research aims to stimulate further dialogue and research on developing safeguards against the misuse of {LLMs} outside of democratic environments.},
	publisher = {Open Science Framework},
	author = {Urman, Aleksandra and Makhortykh, Mykola},
	urldate = {2025-01-26},
	date = {2023-09-08},
	file = {Versione inviata:C\:\\Users\\moles\\Zotero\\storage\\LXE3LLEE\\Urman e Makhortykh - 2023 - The Silence of the LLMs Cross-Lingual Analysis of Political Bias and False Information Prevalence i.pdf:application/pdf},
}

@article{wu_harnessing_2024,
	title = {Harnessing Response Consistency for Superior {LLM} Performance: The Promise and Peril of Answer-Augmented Prompting},
	volume = {13},
	rights = {https://creativecommons.org/licenses/by/4.0/},
	issn = {2079-9292},
	url = {https://www.mdpi.com/2079-9292/13/23/4581},
	doi = {10.3390/electronics13234581},
	shorttitle = {Harnessing Response Consistency for Superior {LLM} Performance},
	abstract = {This paper introduces Answer-Augmented Prompting ({AAP}), an innovative approach that leverages the Response Consistency of History of Dialogue ({HoD}) phenomenon in Large Language Models ({LLMs}). {AAP} not only achieves significantly superior performance enhancements compared to traditional augmentation methods but also exhibits a stronger potential for “jailbreaking”, allowing models to produce unsafe or misleading responses. By strategically modifying the {HoD}, {AAP} influences {LLM} performance in a dual manner: it promotes accuracy while amplifying risks associated with bypassing built-in safeguards. Our experiments demonstrate that {AAP} outperforms standard methods in both effectiveness and the ability to elicit harmful content. To address these risks, we propose comprehensive mitigation strategies for both {LLM} service providers and end-users. This research offers valuable insights into the implications of Response Consistency in {LLMs}, underscoring the promise and peril of this powerful capability.},
	pages = {4581},
	number = {23},
	journaltitle = {Electronics},
	shortjournal = {Electronics},
	author = {Wu, Hua and Hong, Haotian and Sun, Li and Bai, Xiaojing and Pu, Mengyang},
	urldate = {2025-01-26},
	date = {2024-11-21},
	langid = {english},
	file = {Full text:C\:\\Users\\moles\\Zotero\\storage\\NEZK2W8R\\Wu et al. - 2024 - Harnessing Response Consistency for Superior LLM Performance The Promise and Peril of Answer-Augmen.pdf:application/pdf},
}

@inproceedings{yang_censorship_2021,
	location = {Virtual Event Canada},
	title = {Censorship of Online Encyclopedias: Implications for {NLP} Models},
	isbn = {978-1-4503-8309-7},
	url = {https://dl.acm.org/doi/10.1145/3442188.3445916},
	doi = {10.1145/3442188.3445916},
	shorttitle = {Censorship of Online Encyclopedias},
	eventtitle = {{FAccT} '21: 2021 {ACM} Conference on Fairness, Accountability, and Transparency},
	pages = {537--548},
	booktitle = {Proceedings of the 2021 {ACM} Conference on Fairness, Accountability, and Transparency},
	publisher = {{ACM}},
	author = {Yang, Eddie and Roberts, Margaret E.},
	urldate = {2025-01-26},
	date = {2021-03-03},
	langid = {english},
	file = {Full text:C\:\\Users\\moles\\Zotero\\storage\\U7UGUHB5\\Yang e Roberts - 2021 - Censorship of Online Encyclopedias Implications for NLP Models.pdf:application/pdf},
}

@misc{lin_malla_2024,
	title = {Malla: Demystifying Real-world Large Language Model Integrated Malicious Services},
	rights = {{arXiv}.org perpetual, non-exclusive license},
	url = {https://arxiv.org/abs/2401.03315},
	doi = {10.48550/ARXIV.2401.03315},
	shorttitle = {Malla},
	abstract = {The underground exploitation of large language models ({LLMs}) for malicious services (i.e., Malla) is witnessing an uptick, amplifying the cyber threat landscape and posing questions about the trustworthiness of {LLM} technologies. However, there has been little effort to understand this new cybercrime, in terms of its magnitude, impact, and techniques. In this paper, we conduct the first systematic study on 212 real-world Mallas, uncovering their proliferation in underground marketplaces and exposing their operational modalities. Our study discloses the Malla ecosystem, revealing its significant growth and impact on today's public {LLM} services. Through examining 212 Mallas, we uncovered eight backend {LLMs} used by Mallas, along with 182 prompts that circumvent the protective measures of public {LLM} {APIs}. We further demystify the tactics employed by Mallas, including the abuse of uncensored {LLMs} and the exploitation of public {LLM} {APIs} through jailbreak prompts. Our findings enable a better understanding of the real-world exploitation of {LLMs} by cybercriminals, offering insights into strategies to counteract this cybercrime.},
	publisher = {{arXiv}},
	author = {Lin, Zilong and Cui, Jian and Liao, Xiaojing and Wang, {XiaoFeng}},
	urldate = {2025-01-26},
	date = {2024},
	note = {Version Number: 3},
	keywords = {{FOS}: Computer and information sciences, Artificial Intelligence (cs.{AI}), Cryptography and Security (cs.{CR})},
}

@article{ayana_decolonizing_2024,
	title = {Decolonizing global {AI} governance: assessment of the state of decolonized {AI} governance in Sub-Saharan Africa},
	volume = {11},
	issn = {2054-5703},
	url = {https://royalsocietypublishing.org/doi/10.1098/rsos.231994},
	doi = {10.1098/rsos.231994},
	shorttitle = {Decolonizing global {AI} governance},
	abstract = {Global artificial intelligence ({AI}) governance must prioritize equity, embrace a decolonial mindset, and provide the Global South countries the authority to spearhead solution creation. Decolonization is crucial for dismantling Western-centric cognitive frameworks and mitigating biases. Integrating a decolonial approach to {AI} governance involves recognizing persistent colonial repercussions, leading to biases in {AI} solutions and disparities in {AI} access based on gender, race, geography, income and societal factors. This paradigm shift necessitates deliberate efforts to deconstruct imperial structures governing knowledge production, perpetuating global unequal resource access and biases. This research evaluates Sub-Saharan African progress in {AI} governance decolonization, focusing on indicators like {AI} governance institutions, national strategies, sovereignty prioritization, data protection regulations, and adherence to local data usage requirements. Results show limited progress, with only Rwanda notably responsive to decolonization among the ten countries evaluated; 80\% are ‘decolonization-aware’, and one is ‘decolonization-blind’. The paper provides a detailed analysis of each nation, offering recommendations for fostering decolonization, including stakeholder involvement, addressing inequalities, promoting ethical {AI}, supporting local innovation, building regional partnerships, capacity building, public awareness, and inclusive governance. This paper contributes to elucidating the challenges and opportunities associated with decolonization in {SSA} countries, thereby enriching the ongoing discourse on global {AI} governance.},
	pages = {231994},
	number = {8},
	journaltitle = {Royal Society Open Science},
	shortjournal = {R. Soc. Open Sci.},
	author = {Ayana, Gelan and Dese, Kokeb and Daba Nemomssa, Hundessa and Habtamu, Bontu and Mellado, Bruce and Badu, Kingsley and Yamba, Edmund and Faye, Sylvain Landry and Ondua, Moise and Nsagha, Dickson and Nkweteyim, Denis and Kong, Jude Dzevela},
	urldate = {2025-01-26},
	date = {2024-08},
	langid = {english},
}

@misc{capraro_impact_2023,
	title = {The impact of generative artificial intelligence on socioeconomic inequalities and policy making},
	rights = {https://creativecommons.org/licenses/by/4.0/legalcode},
	url = {https://osf.io/6fd2y},
	doi = {10.31234/osf.io/6fd2y},
	abstract = {Generative artificial intelligence has the potential to both exacerbate and ameliorate existing socioeconomic inequalities. In this article, we provide a state-of-the-art interdisciplinary overview of the potential impacts of generative {AI} on (mis)information and three information-intensive domains: work, education, and healthcare. Our goal is to highlight how generative {AI} could worsen existing inequalities while illuminating how {AI} may help mitigate pervasive social problems. In the information domain, generative {AI} can democratize content creation and access, but may dramatically expand the production and proliferation of misinformation. In the workplace, it can boost productivity and create new jobs, but the benefits will likely be distributed unevenly. In education, it offers personalized learning, but may widen the digital divide. In healthcare, it might improve diagnostics and accessibility, but could deepen pre-existing inequalities. In each section we cover a specific topic, evaluate existing research, identify critical gaps, and recommend research directions, including explicit trade-offs that complicate the derivation of a priori hypotheses. We conclude with a section highlighting the role of policymaking to maximize generative {AI}’s potential to reduce inequalities while mitigating its harmful effects. We discuss strengths and weaknesses of existing policy frameworks in the European Union, the United States, and the United Kingdom, observing that each fails to fully confront the socioeconomic challenges we have identified. We propose several concrete policies that could promote shared prosperity through the advancement of generative {AI}. This article emphasizes the need for interdisciplinary collaborations to understand and address the complex challenges of generative {AI}.},
	publisher = {{PsyArXiv}},
	author = {Capraro, Valerio and Lentsch, Austin and Acemoglu, Daron and Akgun, Selin and Akhmedova, Aisel and Bilancini, Ennio and Bonnefon, Jean-François and Branas-Garza, Pablo and Butera, Luigi and Douglas, Karen and Everett, Jim Albert Charlton and Gigerenzer, Gerd and Greenhow, Christine and Hashimoto, Daniel and Holt-Lunstad, Julianne and Jetten, Jolanda and Johnson, Simon and Kunz, Werner and Longoni, Chiara and Lunn, Peter D and Natale, Simone and Paluch, Stefanie and Rahwan, Iyad and Selwyn, Neil and Singh, Vivek and Suri, Siddharth and Sutcliffe, Jennifer and Tomlinson, Joe and Linden, Sander Van Der and Van Lange, Paul and Wall, Friederike and Van Bavel, Jay Joseph and Viale, Riccardo},
	urldate = {2025-01-26},
	date = {2023-12-16},
	file = {Full text:C\:\\Users\\moles\\Zotero\\storage\\V5ED93H7\\Capraro et al. - 2023 - The impact of generative artificial intelligence on socioeconomic inequalities and policy making.pdf:application/pdf},
}

@article{onyebuchi_nneamaka_chisom_review_2024,
	title = {{REVIEW} {OF} {AI} {IN} {EDUCATION}: {TRANSFORMING} {LEARNING} {ENVIRONMENTS} {IN} {AFRICA}},
	volume = {5},
	rights = {https://creativecommons.org/licenses/by-nc/4.0},
	issn = {2706-9184, 2706-9176},
	url = {https://fepbl.com/index.php/ijarss/article/view/725},
	doi = {10.51594/ijarss.v5i10.725},
	shorttitle = {{REVIEW} {OF} {AI} {IN} {EDUCATION}},
	abstract = {This study analyses artificial intelligence ({AI}'s) impact on education in Africa, focusing on personalized learning, technology integration, and challenges in educational development. This review explores the transformative role of Artificial Intelligence ({AI}) in reshaping educational landscapes across Africa. As the continent strives for inclusive and quality education, {AI} emerges as a potent tool with the potential to address educational challenges, enhance learning outcomes, and bridge existing gaps. The review delves into various applications of {AI} in education, ranging from personalized learning experiences to adaptive assessment methodologies, and examines their impact on diverse learning environments. It gives an overview of the current state of education in Africa, the review highlights the disparities in access, quality, and infrastructure. It also investigates the innovative ways in which {AI} technologies are being integrated into educational systems. {AI}-powered adaptive learning platforms, virtual tutors, and intelligent content delivery systems are analyzed for their effectiveness in catering to the diverse needs of students across the continent. The review also addresses the potential of {AI} in overcoming language barriers, promoting literacy, and fostering digital skills development. Moreover, it explores the role of {AI} in facilitating teacher support, professional development, and administrative tasks, thereby contributing to the overall improvement of the education ecosystem. Ethical considerations, privacy concerns, and the digital divide are critically examined to ensure that the integration of {AI} in education aligns with ethical standards and promotes equitable access. Case studies and pilot projects from various African countries are presented to illustrate successful implementations, challenges faced, and lessons learned. Furthermore, the review discusses the importance of collaborative efforts involving governments, educational institutions, technology developers, and the private sector. Policy recommendations and strategic initiatives are explored to guide the responsible and sustainable integration of {AI} in education across the diverse socio-economic and cultural contexts prevalent in Africa. In conclusion, the review synthesizes the current state of {AI} in education in Africa, offering insights into its potential to revolutionize learning environments. The transformative power of {AI} in addressing educational challenges and fostering a culture of continuous improvement is underscored, paving the way for a more inclusive, accessible, and innovative education landscape in the African context. 
Keywords: Artificial Intelligence, Education, Transform Learning, Environments, Africa.},
	pages = {637--654},
	number = {10},
	journaltitle = {International Journal of Applied Research in Social Sciences},
	shortjournal = {Int. j. appl. res. soc. sci.},
	author = {{Onyebuchi Nneamaka Chisom} and {Chika Chioma Unachukwu} and {Blessing Osawaru}},
	urldate = {2025-01-26},
	date = {2024-01-15},
	file = {Full text:C\:\\Users\\moles\\Zotero\\storage\\KNGK2Z25\\Onyebuchi Nneamaka Chisom et al. - 2024 - REVIEW OF AI IN EDUCATION TRANSFORMING LEARNING ENVIRONMENTS IN AFRICA.pdf:application/pdf},
}

@misc{deepseek-ai_deepseek-v2_2024,
	title = {{DeepSeek}-V2: A Strong, Economical, and Efficient Mixture-of-Experts Language Model},
	rights = {{arXiv}.org perpetual, non-exclusive license},
	url = {https://arxiv.org/abs/2405.04434},
	doi = {10.48550/ARXIV.2405.04434},
	shorttitle = {{DeepSeek}-V2},
	abstract = {We present {DeepSeek}-V2, a strong Mixture-of-Experts ({MoE}) language model characterized by economical training and efficient inference. It comprises 236B total parameters, of which 21B are activated for each token, and supports a context length of 128K tokens. {DeepSeek}-V2 adopts innovative architectures including Multi-head Latent Attention ({MLA}) and {DeepSeekMoE}. {MLA} guarantees efficient inference through significantly compressing the Key-Value ({KV}) cache into a latent vector, while {DeepSeekMoE} enables training strong models at an economical cost through sparse computation. Compared with {DeepSeek} 67B, {DeepSeek}-V2 achieves significantly stronger performance, and meanwhile saves 42.5\% of training costs, reduces the {KV} cache by 93.3\%, and boosts the maximum generation throughput to 5.76 times. We pretrain {DeepSeek}-V2 on a high-quality and multi-source corpus consisting of 8.1T tokens, and further perform Supervised Fine-Tuning ({SFT}) and Reinforcement Learning ({RL}) to fully unlock its potential. Evaluation results show that, even with only 21B activated parameters, {DeepSeek}-V2 and its chat versions still achieve top-tier performance among open-source models.},
	publisher = {{arXiv}},
	author = {{DeepSeek-AI} and Liu, Aixin and Feng, Bei and Wang, Bin and Wang, Bingxuan and Liu, Bo and Zhao, Chenggang and Dengr, Chengqi and Ruan, Chong and Dai, Damai and Guo, Daya and Yang, Dejian and Chen, Deli and Ji, Dongjie and Li, Erhang and Lin, Fangyun and Luo, Fuli and Hao, Guangbo and Chen, Guanting and Li, Guowei and Zhang, H. and Xu, Hanwei and Yang, Hao and Zhang, Haowei and Ding, Honghui and Xin, Huajian and Gao, Huazuo and Li, Hui and Qu, Hui and Cai, J. L. and Liang, Jian and Guo, Jianzhong and Ni, Jiaqi and Li, Jiashi and Chen, Jin and Yuan, Jingyang and Qiu, Junjie and Song, Junxiao and Dong, Kai and Gao, Kaige and Guan, Kang and Wang, Lean and Zhang, Lecong and Xu, Lei and Xia, Leyi and Zhao, Liang and Zhang, Liyue and Li, Meng and Wang, Miaojun and Zhang, Mingchuan and Zhang, Minghua and Tang, Minghui and Li, Mingming and Tian, Ning and Huang, Panpan and Wang, Peiyi and Zhang, Peng and Zhu, Qihao and Chen, Qinyu and Du, Qiushi and Chen, R. J. and Jin, R. L. and Ge, Ruiqi and Pan, Ruizhe and Xu, Runxin and Chen, Ruyi and Li, S. S. and Lu, Shanghao and Zhou, Shangyan and Chen, Shanhuang and Wu, Shaoqing and Ye, Shengfeng and Ma, Shirong and Wang, Shiyu and Zhou, Shuang and Yu, Shuiping and Zhou, Shunfeng and Zheng, Size and Wang, T. and Pei, Tian and Yuan, Tian and Sun, Tianyu and Xiao, W. L. and Zeng, Wangding and An, Wei and Liu, Wen and Liang, Wenfeng and Gao, Wenjun and Zhang, Wentao and Li, X. Q. and Jin, Xiangyue and Wang, Xianzu and Bi, Xiao and Liu, Xiaodong and Wang, Xiaohan and Shen, Xiaojin and Chen, Xiaokang and Chen, Xiaosha and Nie, Xiaotao and Sun, Xiaowen and Wang, Xiaoxiang and Liu, Xin and Xie, Xin and Yu, Xingkai and Song, Xinnan and Zhou, Xinyi and Yang, Xinyu and Lu, Xuan and Su, Xuecheng and Wu, Y. and Li, Y. K. and Wei, Y. X. and Zhu, Y. X. and Xu, Yanhong and Huang, Yanping and Li, Yao and Zhao, Yao and Sun, Yaofeng and Li, Yaohui and Wang, Yaohui and Zheng, Yi and Zhang, Yichao and Xiong, Yiliang and Zhao, Yilong and He, Ying and Tang, Ying and Piao, Yishi and Dong, Yixin and Tan, Yixuan and Liu, Yiyuan and Wang, Yongji and Guo, Yongqiang and Zhu, Yuchen and Wang, Yuduan and Zou, Yuheng and Zha, Yukun and Ma, Yunxian and Yan, Yuting and You, Yuxiang and Liu, Yuxuan and Ren, Z. Z. and Ren, Zehui and Sha, Zhangli and Fu, Zhe and Huang, Zhen and Zhang, Zhen and Xie, Zhenda and Hao, Zhewen and Shao, Zhihong and Wen, Zhiniu and Xu, Zhipeng and Zhang, Zhongyu and Li, Zhuoshu and Wang, Zihan and Gu, Zihui and Li, Zilin and Xie, Ziwei},
	urldate = {2025-01-26},
	date = {2024},
	note = {Version Number: 5},
	keywords = {Computation and Language (cs.{CL}), {FOS}: Computer and information sciences, Artificial Intelligence (cs.{AI})},
}

@article{chima_abimbola_edeni_role_2024,
	title = {The role of {AI}-enhanced tools in overcoming socioeconomic barriers in education: A conceptual analysis},
	volume = {21},
	issn = {25819615},
	url = {https://wjarr.com/content/role-ai-enhanced-tools-overcoming-socioeconomic-barriers-education-conceptual-analysis},
	doi = {10.30574/wjarr.2024.21.3.0780},
	shorttitle = {The role of {AI}-enhanced tools in overcoming socioeconomic barriers in education},
	abstract = {This conceptual analysis explores the transformative potential of {AI}-enhanced tools in addressing socioeconomic barriers within the educational landscape. By leveraging artificial intelligence ({AI}) technologies, the paper aims to examine how such tools can mitigate disparities arising from economic, social, and cultural factors. Through a critical analysis, it seeks to elucidate the role of {AI} in promoting equitable access, enhancing learning outcomes, and fostering inclusivity in education. The executive summary encapsulates the essence of the conceptual analysis. It provides a concise overview of the paper's objectives, methodology, expected outcomes, and implications. In recent years, the intersection of artificial intelligence ({AI}) and education has garnered significant attention as a potential solution to address persistent socioeconomic barriers within the educational landscape. The executive summary outlines the imperative to explore how {AI}-enhanced tools can serve as transformative agents in mitigating disparities arising from economic, social, and cultural factors. By leveraging {AI} technologies, educators and policymakers have the opportunity to revolutionize traditional educational practices and foster more inclusive learning environments. The summary highlights the urgent need to examine the role of {AI} in promoting equitable access, enhancing learning outcomes, and fostering inclusivity across diverse socioeconomic backgrounds. Through a critical analysis of existing literature, case studies, and empirical research, the conceptual analysis seeks to elucidate the potential of {AI} to bridge the digital divide and advance educational equity. It emphasizes the importance of identifying actionable strategies and best practices for leveraging {AI} technology to address systemic inequalities in education.},
	pages = {944--951},
	number = {3},
	journaltitle = {World Journal of Advanced Research and Reviews},
	shortjournal = {World J. Adv. Res. Rev.},
	author = {{Chima Abimbola Edeni} and {Olabisi Oluwakemi Adeleye} and {Idowu Sulaimon Adeniyi}},
	urldate = {2025-01-26},
	date = {2024-03-30},
	file = {Full text:C\:\\Users\\moles\\Zotero\\storage\\RFS3R2UI\\Chima Abimbola Edeni et al. - 2024 - The role of AI-enhanced tools in overcoming socioeconomic barriers in education A conceptual analys.pdf:application/pdf},
}

@article{li_ai_2023,
	title = {{AI} in Education: Bridging the Divide or Widening the Gap? Exploring Equity, Opportunities, and Challenges in the Digital Age},
	volume = {8},
	issn = {2790-167X},
	url = {https://madison-proceedings.com/index.php/aehssr/article/view/1924},
	doi = {10.56028/aehssr.8.1.355.2023},
	shorttitle = {{AI} in Education},
	abstract = {Artificial Intelligence ({AI}) stands as a pivotal technological advancement with profound societal implications. This paper delves into a comprehensive analysis of diverse articles and perspectives to scrutinize {AI}'s influence on educational inequality, particularly within the context of the Chinese education system. While prevailing literature often skims the surface, there's a burgeoning sentiment celebrating the human-{AI} synergy, often overlooking its potential to accentuate educational disparities. This research delves deeper, uncovering the intricate nexus between {AI}-driven education and human capital markets. The findings suggest that {AI}, while promising, might inadvertently perpetuate the same crises across different demographics, amplifying existing inequalities. The strong may become stronger, while the vulnerable risk further marginalization, primarily due to disparities in resource allocation. To mitigate these challenges, this paper proposes three actionable recommendations. Furthermore, recognizing the global implications of this issue, the study advocates for international collaboration to ensure equitable access to {AI}-related educational resources, championing the cause of educational fairness worldwide.},
	pages = {355},
	number = {1},
	journaltitle = {Advances in Education, Humanities and Social Science Research},
	shortjournal = {{AEHSSR}},
	author = {Li, Haomin},
	urldate = {2025-01-26},
	date = {2023-12-06},
	file = {Full text:C\:\\Users\\moles\\Zotero\\storage\\4TCKBBUV\\Li - 2023 - AI in Education Bridging the Divide or Widening the Gap Exploring Equity, Opportunities, and Chall.pdf:application/pdf},
}

@misc{qu_survey_2024,
	title = {A Survey of Mamba},
	rights = {Creative Commons Attribution Non Commercial Share Alike 4.0 International},
	url = {https://arxiv.org/abs/2408.01129},
	doi = {10.48550/ARXIV.2408.01129},
	abstract = {As one of the most representative {DL} techniques, Transformer architecture has empowered numerous advanced models, especially the large language models ({LLMs}) that comprise billions of parameters, becoming a cornerstone in deep learning. Despite the impressive achievements, Transformers still face inherent limitations, particularly the time-consuming inference resulting from the quadratic computation complexity of attention calculation. Recently, a novel architecture named Mamba, drawing inspiration from classical state space models ({SSMs}), has emerged as a promising alternative for building foundation models, delivering comparable modeling abilities to Transformers while preserving near-linear scalability concerning sequence length. This has sparked an increasing number of studies actively exploring Mamba's potential to achieve impressive performance across diverse domains. Given such rapid evolution, there is a critical need for a systematic review that consolidates existing Mamba-empowered models, offering a comprehensive understanding of this emerging model architecture. In this survey, we therefore conduct an in-depth investigation of recent Mamba-associated studies, covering three main aspects: the advancements of Mamba-based models, the techniques of adapting Mamba to diverse data, and the applications where Mamba can excel. Specifically, we first review the foundational knowledge of various representative deep learning models and the details of Mamba-1\&amp;2 as preliminaries. Then, to showcase the significance of Mamba for {AI}, we comprehensively review the related studies focusing on Mamba models' architecture design, data adaptability, and applications. Finally, we present a discussion of current limitations and explore various promising research directions to provide deeper insights for future investigations.},
	publisher = {{arXiv}},
	author = {Qu, Haohao and Ning, Liangbo and An, Rui and Fan, Wenqi and Derr, Tyler and Liu, Hui and Xu, Xin and Li, Qing},
	urldate = {2025-01-26},
	date = {2024},
	note = {Version Number: 5},
	keywords = {{FOS}: Computer and information sciences, Machine Learning (cs.{LG}), Artificial Intelligence (cs.{AI})},
}

@article{wilson_gpu_2022,
	title = {{GPU} {PRICES} {AND} {CRYPTOCURRENCY} {RETURNS}},
	volume = {11},
	rights = {http://creativecommons.org/licenses/by-nc-nd/4.0},
	issn = {2253-5802, 2253-5799},
	url = {https://ojs.aut.ac.nz/applied-finance-letters/article/view/503},
	doi = {10.24135/afl.v11i.503},
	abstract = {We look at the association between the price of a cryptocurrency and the secondary market prices of the hardware used to mine it. We find the prices of the most efficient Graphical Processing Units ({GPUs}) for Ethereum mining are significantly positively correlated with the daily price returns to that cryptocurrency.},
	pages = {2--8},
	journaltitle = {Applied Finance Letters},
	shortjournal = {{AFL}},
	author = {Wilson, Linus},
	urldate = {2025-01-26},
	date = {2022-03-06},
	file = {Full text:C\:\\Users\\moles\\Zotero\\storage\\H2HZHG58\\Wilson - 2022 - GPU PRICES AND CRYPTOCURRENCY RETURNS.pdf:application/pdf},
}

@misc{xiao_large_2024,
	title = {Large Language Model Performance Benchmarking on Mobile Platforms: A Thorough Evaluation},
	rights = {{arXiv}.org perpetual, non-exclusive license},
	url = {https://arxiv.org/abs/2410.03613},
	doi = {10.48550/ARXIV.2410.03613},
	shorttitle = {Large Language Model Performance Benchmarking on Mobile Platforms},
	abstract = {As large language models ({LLMs}) increasingly integrate into every aspect of our work and daily lives, there are growing concerns about user privacy, which push the trend toward local deployment of these models. There are a number of lightweight {LLMs} (e.g., Gemini Nano, {LLAMA}2 7B) that can run locally on smartphones, providing users with greater control over their personal data. As a rapidly emerging application, we are concerned about their performance on commercial-off-the-shelf mobile devices. To fully understand the current landscape of {LLM} deployment on mobile platforms, we conduct a comprehensive measurement study on mobile devices. We evaluate both metrics that affect user experience, including token throughput, latency, and battery consumption, as well as factors critical to developers, such as resource utilization, {DVFS} strategies, and inference engines. In addition, we provide a detailed analysis of how these hardware capabilities and system dynamics affect on-device {LLM} performance, which may help developers identify and address bottlenecks for mobile {LLM} applications. We also provide comprehensive comparisons across the mobile system-on-chips ({SoCs}) from major vendors, highlighting their performance differences in handling {LLM} workloads. We hope that this study can provide insights for both the development of on-device {LLMs} and the design for future mobile system architecture.},
	publisher = {{arXiv}},
	author = {Xiao, Jie and Huang, Qianyi and Chen, Xu and Tian, Chen},
	urldate = {2025-01-26},
	date = {2024},
	note = {Version Number: 1},
	keywords = {{FOS}: Computer and information sciences, Machine Learning (cs.{LG})},
}

@misc{yong_low-resource_2023,
	title = {Low-Resource Languages Jailbreak {GPT}-4},
	rights = {{arXiv}.org perpetual, non-exclusive license},
	url = {https://arxiv.org/abs/2310.02446},
	doi = {10.48550/ARXIV.2310.02446},
	abstract = {{AI} safety training and red-teaming of large language models ({LLMs}) are measures to mitigate the generation of unsafe content. Our work exposes the inherent cross-lingual vulnerability of these safety mechanisms, resulting from the linguistic inequality of safety training data, by successfully circumventing {GPT}-4's safeguard through translating unsafe English inputs into low-resource languages. On the {AdvBenchmark}, {GPT}-4 engages with the unsafe translated inputs and provides actionable items that can get the users towards their harmful goals 79\% of the time, which is on par with or even surpassing state-of-the-art jailbreaking attacks. Other high-/mid-resource languages have significantly lower attack success rate, which suggests that the cross-lingual vulnerability mainly applies to low-resource languages. Previously, limited training on low-resource languages primarily affects speakers of those languages, causing technological disparities. However, our work highlights a crucial shift: this deficiency now poses a risk to all {LLMs} users. Publicly available translation {APIs} enable anyone to exploit {LLMs}' safety vulnerabilities. Therefore, our work calls for a more holistic red-teaming efforts to develop robust multilingual safeguards with wide language coverage.},
	publisher = {{arXiv}},
	author = {Yong, Zheng-Xin and Menghini, Cristina and Bach, Stephen H.},
	urldate = {2025-01-26},
	date = {2023},
	note = {Version Number: 2},
	keywords = {Computation and Language (cs.{CL}), {FOS}: Computer and information sciences, Machine Learning (cs.{LG}), Artificial Intelligence (cs.{AI}), Cryptography and Security (cs.{CR})},
}

@misc{zhong_opportunities_2024,
	title = {Opportunities and Challenges of Large Language Models for Low-Resource Languages in Humanities Research},
	rights = {Creative Commons Attribution Non Commercial No Derivatives 4.0 International},
	url = {https://arxiv.org/abs/2412.04497},
	doi = {10.48550/ARXIV.2412.04497},
	abstract = {Low-resource languages serve as invaluable repositories of human history, embodying cultural evolution and intellectual diversity. Despite their significance, these languages face critical challenges, including data scarcity and technological limitations, which hinder their comprehensive study and preservation. Recent advancements in large language models ({LLMs}) offer transformative opportunities for addressing these challenges, enabling innovative methodologies in linguistic, historical, and cultural research. This study systematically evaluates the applications of {LLMs} in low-resource language research, encompassing linguistic variation, historical documentation, cultural expressions, and literary analysis. By analyzing technical frameworks, current methodologies, and ethical considerations, this paper identifies key challenges such as data accessibility, model adaptability, and cultural sensitivity. Given the cultural, historical, and linguistic richness inherent in low-resource languages, this work emphasizes interdisciplinary collaboration and the development of customized models as promising avenues for advancing research in this domain. By underscoring the potential of integrating artificial intelligence with the humanities to preserve and study humanity's linguistic and cultural heritage, this study fosters global efforts towards safeguarding intellectual diversity.},
	publisher = {{arXiv}},
	author = {Zhong, Tianyang and Yang, Zhenyuan and Liu, Zhengliang and Zhang, Ruidong and Liu, Yiheng and Sun, Haiyang and Pan, Yi and Li, Yiwei and Zhou, Yifan and Jiang, Hanqi and Chen, Junhao and Liu, Tianming},
	urldate = {2025-01-26},
	date = {2024},
	note = {Version Number: 2},
	keywords = {Computation and Language (cs.{CL}), {FOS}: Computer and information sciences, Artificial Intelligence (cs.{AI})},
}

@article{walter_embracing_2024,
	title = {Embracing the future of Artificial Intelligence in the classroom: the relevance of {AI} literacy, prompt engineering, and critical thinking in modern education},
	volume = {21},
	issn = {2365-9440},
	url = {https://educationaltechnologyjournal.springeropen.com/articles/10.1186/s41239-024-00448-3},
	doi = {10.1186/s41239-024-00448-3},
	shorttitle = {Embracing the future of Artificial Intelligence in the classroom},
	abstract = {Abstract
            The present discussion examines the transformative impact of Artificial Intelligence ({AI}) in educational settings, focusing on the necessity for {AI} literacy, prompt engineering proficiency, and enhanced critical thinking skills. The introduction of {AI} into education marks a significant departure from conventional teaching methods, offering personalized learning and support for diverse educational requirements, including students with special needs. However, this integration presents challenges, including the need for comprehensive educator training and curriculum adaptation to align with societal structures. {AI} literacy is identified as crucial, encompassing an understanding of {AI} technologies and their broader societal impacts. Prompt engineering is highlighted as a key skill for eliciting specific responses from {AI} systems, thereby enriching educational experiences and promoting critical thinking. There is detailed analysis of strategies for embedding these skills within educational curricula and pedagogical practices. This is discussed through a case-study based on a Swiss university and a narrative literature review, followed by practical suggestions of how to implement {AI} in the classroom.},
	pages = {15},
	number = {1},
	journaltitle = {International Journal of Educational Technology in Higher Education},
	shortjournal = {Int J Educ Technol High Educ},
	author = {Walter, Yoshija},
	urldate = {2025-01-26},
	date = {2024-02-26},
	langid = {english},
	file = {Full text:C\:\\Users\\moles\\Zotero\\storage\\N88NSYJK\\Walter - 2024 - Embracing the future of Artificial Intelligence in the classroom the relevance of AI literacy, prom.pdf:application/pdf},
}

@article{bauchner_use_2024,
	title = {Use of artificial intelligence and the future of peer review},
	volume = {2},
	rights = {https://creativecommons.org/licenses/by-nc/4.0/},
	issn = {2976-5390},
	url = {https://academic.oup.com/healthaffairsscholar/article/doi/10.1093/haschl/qxae058/7663651},
	doi = {10.1093/haschl/qxae058},
	abstract = {Abstract
            Conducting high-quality peer review of scientific manuscripts has become increasingly challenging. The substantial increase in the number of manuscripts, lack of a sufficient number of peer-reviewers, and questions related to effectiveness, fairness, and efficiency, require a different approach. Large-language models, 1 form of artificial intelligence ({AI}), have emerged as a new approach to help resolve many of the issues facing contemporary medicine and science. We believe {AI} should be used to assist in the triaging of manuscripts submitted for peer-review publication.},
	pages = {qxae058},
	number = {5},
	journaltitle = {Health Affairs Scholar},
	author = {Bauchner, Howard and Rivara, Frederick P},
	urldate = {2025-01-26},
	date = {2024-05-03},
	langid = {english},
	file = {Full text:C\:\\Users\\moles\\Zotero\\storage\\FUIB5GET\\Bauchner e Rivara - 2024 - Use of artificial intelligence and the future of peer review.pdf:application/pdf},
}

@article{noauthor_reviewing_2024,
	title = {Reviewing the performance of {AI} detection tools in differentiating between {AI}-generated and human-written texts: A literature and integrative hybrid review},
	volume = {7},
	issn = {2591-801X, 2591-801X},
	url = {https://journals.sfu.ca/jalt/index.php/jalt/article/view/1369},
	doi = {10.37074/jalt.2024.7.1.14},
	shorttitle = {Reviewing the performance of {AI} detection tools in differentiating between {AI}-generated and human-written texts},
	number = {1},
	journaltitle = {Journal of Applied Learning \& Teaching},
	shortjournal = {{JALT}},
	urldate = {2025-01-26},
	date = {2024-02-07},
	langid = {english},
	file = {Full text:C\:\\Users\\moles\\Zotero\\storage\\MEM8F9E7\\2024 - Reviewing the performance of AI detection tools in differentiating between AI-generated and human-wr.pdf:application/pdf},
}

@misc{kirchenbauer_watermark_2023,
	title = {A Watermark for Large Language Models},
	rights = {{arXiv}.org perpetual, non-exclusive license},
	url = {https://arxiv.org/abs/2301.10226},
	doi = {10.48550/ARXIV.2301.10226},
	abstract = {Potential harms of large language models can be mitigated by watermarking model output, i.e., embedding signals into generated text that are invisible to humans but algorithmically detectable from a short span of tokens. We propose a watermarking framework for proprietary language models. The watermark can be embedded with negligible impact on text quality, and can be detected using an efficient open-source algorithm without access to the language model {API} or parameters. The watermark works by selecting a randomized set of "green" tokens before a word is generated, and then softly promoting use of green tokens during sampling. We propose a statistical test for detecting the watermark with interpretable p-values, and derive an information-theoretic framework for analyzing the sensitivity of the watermark. We test the watermark using a multi-billion parameter model from the Open Pretrained Transformer ({OPT}) family, and discuss robustness and security.},
	publisher = {{arXiv}},
	author = {Kirchenbauer, John and Geiping, Jonas and Wen, Yuxin and Katz, Jonathan and Miers, Ian and Goldstein, Tom},
	urldate = {2025-01-26},
	date = {2023},
	note = {Version Number: 4},
	keywords = {Computation and Language (cs.{CL}), {FOS}: Computer and information sciences, Machine Learning (cs.{LG}), Cryptography and Security (cs.{CR})},
}

@article{liang_gpt_2023,
	title = {{GPT} detectors are biased against non-native English writers},
	volume = {4},
	issn = {26663899},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S2666389923001307},
	doi = {10.1016/j.patter.2023.100779},
	pages = {100779},
	number = {7},
	journaltitle = {Patterns},
	shortjournal = {Patterns},
	author = {Liang, Weixin and Yuksekgonul, Mert and Mao, Yining and Wu, Eric and Zou, James},
	urldate = {2025-01-26},
	date = {2023-07},
	langid = {english},
	file = {PubMed Central Full Text PDF:C\:\\Users\\moles\\Zotero\\storage\\NWMX9I5P\\Liang et al. - 2023 - GPT detectors are biased against non-native English writers.pdf:application/pdf},
}

@misc{lu_ai_2024,
	title = {The {AI} Scientist: Towards Fully Automated Open-Ended Scientific Discovery},
	rights = {Creative Commons Attribution 4.0 International},
	url = {https://arxiv.org/abs/2408.06292},
	doi = {10.48550/ARXIV.2408.06292},
	shorttitle = {The {AI} Scientist},
	abstract = {One of the grand challenges of artificial general intelligence is developing agents capable of conducting scientific research and discovering new knowledge. While frontier models have already been used as aides to human scientists, e.g. for brainstorming ideas, writing code, or prediction tasks, they still conduct only a small part of the scientific process. This paper presents the first comprehensive framework for fully automatic scientific discovery, enabling frontier large language models to perform research independently and communicate their findings. We introduce The {AI} Scientist, which generates novel research ideas, writes code, executes experiments, visualizes results, describes its findings by writing a full scientific paper, and then runs a simulated review process for evaluation. In principle, this process can be repeated to iteratively develop ideas in an open-ended fashion, acting like the human scientific community. We demonstrate its versatility by applying it to three distinct subfields of machine learning: diffusion modeling, transformer-based language modeling, and learning dynamics. Each idea is implemented and developed into a full paper at a cost of less than \$15 per paper. To evaluate the generated papers, we design and validate an automated reviewer, which we show achieves near-human performance in evaluating paper scores. The {AI} Scientist can produce papers that exceed the acceptance threshold at a top machine learning conference as judged by our automated reviewer. This approach signifies the beginning of a new era in scientific discovery in machine learning: bringing the transformative benefits of {AI} agents to the entire research process of {AI} itself, and taking us closer to a world where endless affordable creativity and innovation can be unleashed on the world's most challenging problems. Our code is open-sourced at https://github.com/{SakanaAI}/{AI}-Scientist},
	publisher = {{arXiv}},
	author = {Lu, Chris and Lu, Cong and Lange, Robert Tjarko and Foerster, Jakob and Clune, Jeff and Ha, David},
	urldate = {2025-01-26},
	date = {2024},
	note = {Version Number: 3},
	keywords = {Computation and Language (cs.{CL}), {FOS}: Computer and information sciences, Machine Learning (cs.{LG}), Artificial Intelligence (cs.{AI})},
}

@misc{sadasivan_can_2023,
	title = {Can {AI}-Generated Text be Reliably Detected?},
	rights = {Creative Commons Attribution 4.0 International},
	url = {https://arxiv.org/abs/2303.11156},
	doi = {10.48550/ARXIV.2303.11156},
	abstract = {Large Language Models ({LLMs}) perform impressively well in various applications. However, the potential for misuse of these models in activities such as plagiarism, generating fake news, and spamming has raised concern about their responsible use. Consequently, the reliable detection of {AI}-generated text has become a critical area of research. {AI} text detectors have shown to be effective under their specific settings. In this paper, we stress-test the robustness of these {AI} text detectors in the presence of an attacker. We introduce recursive paraphrasing attack to stress test a wide range of detection schemes, including the ones using the watermarking as well as neural network-based detectors, zero shot classifiers, and retrieval-based detectors. Our experiments conducted on passages, each approximately 300 tokens long, reveal the varying sensitivities of these detectors to our attacks. Our findings indicate that while our recursive paraphrasing method can significantly reduce detection rates, it only slightly degrades text quality in many cases, highlighting potential vulnerabilities in current detection systems in the presence of an attacker. Additionally, we investigate the susceptibility of watermarked {LLMs} to spoofing attacks aimed at misclassifying human-written text as {AI}-generated. We demonstrate that an attacker can infer hidden {AI} text signatures without white-box access to the detection method, potentially leading to reputational risks for {LLM} developers. Finally, we provide a theoretical framework connecting the {AUROC} of the best possible detector to the Total Variation distance between human and {AI} text distributions. This analysis offers insights into the fundamental challenges of reliable detection as language models continue to advance. Our code is publicly available at https://github.com/vinusankars/Reliability-of-{AI}-text-detectors.},
	publisher = {{arXiv}},
	author = {Sadasivan, Vinu Sankar and Kumar, Aounon and Balasubramanian, Sriram and Wang, Wenxiao and Feizi, Soheil},
	urldate = {2025-01-26},
	date = {2023},
	note = {Version Number: 4},
	keywords = {Computation and Language (cs.{CL}), {FOS}: Computer and information sciences, Machine Learning (cs.{LG}), Artificial Intelligence (cs.{AI})},
}

@misc{yakura_empirical_2024,
	title = {Empirical evidence of Large Language Model's influence on human spoken communication},
	rights = {{arXiv}.org perpetual, non-exclusive license},
	url = {https://arxiv.org/abs/2409.01754},
	doi = {10.48550/ARXIV.2409.01754},
	abstract = {Artificial Intelligence ({AI}) agents now interact with billions of humans in natural language, thanks to advances in Large Language Models ({LLMs}) like {ChatGPT}. This raises the question of whether {AI} has the potential to shape a fundamental aspect of human culture: the way we speak. Recent analyses revealed that scientific publications already exhibit evidence of {AI}-specific language. But this evidence is inconclusive, since scientists may simply be using {AI} to copy-edit their writing. To explore whether {AI} has influenced human spoken communication, we transcribed and analyzed about 280,000 English-language videos of presentations, talks, and speeches from more than 20,000 {YouTube} channels of academic institutions. We find a significant shift in the trend of word usage specific to words distinctively associated with {ChatGPT} following its release. These findings provide the first empirical evidence that humans increasingly imitate {LLMs} in their spoken language. Our results raise societal and policy-relevant concerns about the potential of {AI} to unintentionally reduce linguistic diversity, or to be deliberately misused for mass manipulation. They also highlight the need for further investigation into the feedback loops between machine behavior and human culture.},
	publisher = {{arXiv}},
	author = {Yakura, Hiromu and Lopez-Lopez, Ezequiel and Brinkmann, Levin and Serna, Ignacio and Gupta, Prateek and Rahwan, Iyad},
	urldate = {2025-01-26},
	date = {2024},
	note = {Version Number: 1},
	keywords = {Computation and Language (cs.{CL}), {FOS}: Computer and information sciences, Artificial Intelligence (cs.{AI}), Computers and Society (cs.{CY}), Human-Computer Interaction (cs.{HC})},
}

@article{hopfenbeck_challenges_2023,
	title = {Challenges and opportunities for classroom-based formative assessment and {AI}: a perspective article},
	volume = {8},
	issn = {2504-284X},
	url = {https://www.frontiersin.org/articles/10.3389/feduc.2023.1270700/full},
	doi = {10.3389/feduc.2023.1270700},
	shorttitle = {Challenges and opportunities for classroom-based formative assessment and {AI}},
	abstract = {The integration of artificial intelligence ({AI}) into educational contexts may give rise to both positive and negative ramifications for teachers’ uses of formative assessment within their classrooms. Drawing on our diverse experiences as academics, researchers, psychometricians, teachers, and teacher educators specializing in formative assessment, we examine the pedagogical practices in which teachers provide feedback, facilitate peer- and self-assessments, and support students’ learning, and discuss how existing challenges to each of these may be affected by applications of {AI}. Firstly, we overview the challenges in the practice of formative assessment independently of the influence of {AI}. Moreover, based on the authors’ varied experience in formative assessment, we discuss the opportunities that {AI} brings to address the challenges in formative assessment as well as the new challenges introduced by the application of {AI} in formative assessment. Finally, we argue for the ongoing importance of self-regulated learning and a renewed emphasis on critical thinking for more effective implementation of formative assessment in this new {AI}-driven digital age.},
	pages = {1270700},
	journaltitle = {Frontiers in Education},
	shortjournal = {Front. Educ.},
	author = {Hopfenbeck, Therese N. and Zhang, Zhonghua and Sun, Sundance Zhihong and Robertson, Pam and {McGrane}, Joshua A.},
	urldate = {2025-01-26},
	date = {2023-11-23},
	file = {Full text:C\:\\Users\\moles\\Zotero\\storage\\DM6GKV59\\Hopfenbeck et al. - 2023 - Challenges and opportunities for classroom-based formative assessment and AI a perspective article.pdf:application/pdf},
}

@article{nicol_power_2021,
	title = {The power of internal feedback: exploiting natural comparison processes},
	volume = {46},
	issn = {0260-2938, 1469-297X},
	url = {https://www.tandfonline.com/doi/full/10.1080/02602938.2020.1823314},
	doi = {10.1080/02602938.2020.1823314},
	shorttitle = {The power of internal feedback},
	pages = {756--778},
	number = {5},
	journaltitle = {Assessment \& Evaluation in Higher Education},
	shortjournal = {Assessment \& Evaluation in Higher Education},
	author = {Nicol, David},
	urldate = {2025-01-26},
	date = {2021-07-04},
	langid = {english},
	file = {Full text:C\:\\Users\\moles\\Zotero\\storage\\RA3UPS42\\Nicol - 2021 - The power of internal feedback exploiting natural comparison processes.pdf:application/pdf},
}

@article{nicol_making_2022,
	title = {Making internal feedback explicit: harnessing the comparisons students make during two-stage exams},
	volume = {47},
	issn = {0260-2938, 1469-297X},
	url = {https://www.tandfonline.com/doi/full/10.1080/02602938.2021.1934653},
	doi = {10.1080/02602938.2021.1934653},
	shorttitle = {Making internal feedback explicit},
	pages = {507--522},
	number = {4},
	journaltitle = {Assessment \& Evaluation in Higher Education},
	shortjournal = {Assessment \& Evaluation in Higher Education},
	author = {Nicol, David and Selvaretnam, Geethanjali},
	urldate = {2025-01-26},
	date = {2022-05-19},
	langid = {english},
	file = {Versione accettata:C\:\\Users\\moles\\Zotero\\storage\\7DMRI25L\\Nicol e Selvaretnam - 2022 - Making internal feedback explicit harnessing the comparisons students make during two-stage exams.pdf:application/pdf},
}

@article{r_ai-driven_2024,
	title = {{AI}-Driven Flipped Classroom: Revolutionizing Education Through Digital Pedagogy},
	volume = {7},
	rights = {https://creativecommons.org/licenses/by-nc-nd/4.0},
	issn = {2682-6704},
	url = {https://abjournals.org/bjeldp/papers/volume-7/issue-2/ai-driven-flipped-classroom-revolutionizing-education-through-digital-pedagogy/},
	doi = {10.52589/BJELDP-LTDJFLIH},
	shorttitle = {{AI}-Driven Flipped Classroom},
	abstract = {The integration of artificial intelligence ({AI}) into the flipped classroom model is the subject of this research paper. With the flipped classroom approach, traditional teaching methods are reversed, with instructional content being delivered outside of class and class time being devoted to discussions, activities, and problem-solving. Teachers want to give students a personalized learning experience, and they do this by implementing {AI} technologies like intelligent tutoring systems, virtual tutors, and adaptive learning platforms. This study uses existing research and empirical studies to analyse the effects, advantages, difficulties, and efficacy of using {AI} in flipped classrooms. The study explores the use of {AI} in flipped classrooms, highlighting its potential benefits like improved learning outcomes and scalability. However, it also addresses challenges like technology infrastructure, teacher preparation, privacy, and equity, as well as potential drawbacks.},
	pages = {169--179},
	number = {2},
	journaltitle = {British Journal of Education, Learning and Development Psychology},
	shortjournal = {British Journal of Education, Learning and Development Psychology},
	author = {R., Suvendu and P. S., Deb},
	urldate = {2025-01-26},
	date = {2024-06-24},
	langid = {estonian},
	file = {Full text:C\:\\Users\\moles\\Zotero\\storage\\HQ8Y8QWD\\R. e P. S. - 2024 - AI-Driven Flipped Classroom Revolutionizing Education Through Digital Pedagogy.pdf:application/pdf},
}

@article{nurjanah_artificial_2024,
	title = {Artificial Intelligence ({AI}) Usage In Today’s Teaching And Learning Process: A Review},
	volume = {6},
	rights = {http://creativecommons.org/licenses/by-sa/4.0},
	issn = {2684-883X, 2684-6853},
	url = {https://jurnal.syntax-idea.co.id/index.php/syntax-idea/article/view/3126},
	doi = {10.46799/syntax-idea.v6i3.3126},
	shorttitle = {Artificial Intelligence ({AI}) Usage In Today’s Teaching And Learning Process},
	abstract = {In today's technology world, the integration of artificial intelligence ({AI}) has become increasingly prominent in education, with enormous potential to improve the teaching and learning experience. {AI}, defined by its ability to imitate human intelligence, possesses enormous power and has the potential to dramatically impact a variety of areas, most notably education. {AI} has significantly improved learning experiences for both teachers and students by allowing them to be customized and personalized. This review article investigates the prospects provided by {AI} in modern teaching and learning processes, with a special emphasis on its advantages in language learning. This study examines existing literature and studies on {AI} in education, with a focus on language learning environments. The results show {AI}'s advantages in giving targeted feedback and practice opportunities, making language learning easier, and improving overall learning efficiency and effectiveness. Thus, this review contributes to a better understanding of {AI}'s role in redefining present educational paradigms, as well as its potential to transform teaching and learning methodologies.},
	pages = {1517--1523},
	number = {3},
	journaltitle = {Syntax Idea},
	shortjournal = {{SLJIL}},
	author = {Nurjanah, Aisyah and Salsabila, Irma Nuraeni and Azzahra, Adelia and Rahayu, Riska and Marlina, Nina},
	urldate = {2025-01-26},
	date = {2024-04-05},
	file = {Full text:C\:\\Users\\moles\\Zotero\\storage\\RT56VISE\\Nurjanah et al. - 2024 - Artificial Intelligence (AI) Usage In Today’s Teaching And Learning Process A Review.pdf:application/pdf},
}

@article{chen_artificial_2020,
	title = {Artificial Intelligence in Education: A Review},
	volume = {8},
	rights = {https://creativecommons.org/licenses/by/4.0/legalcode},
	issn = {2169-3536},
	url = {https://ieeexplore.ieee.org/document/9069875/},
	doi = {10.1109/ACCESS.2020.2988510},
	shorttitle = {Artificial Intelligence in Education},
	pages = {75264--75278},
	journaltitle = {{IEEE} Access},
	shortjournal = {{IEEE} Access},
	author = {Chen, Lijia and Chen, Pingping and Lin, Zhijian},
	urldate = {2025-01-26},
	date = {2020},
}

@article{cho_student_2010,
	title = {Student revision with peer and expert reviewing},
	volume = {20},
	rights = {https://www.elsevier.com/tdm/userlicense/1.0/},
	issn = {09594752},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0959475209000747},
	doi = {10.1016/j.learninstruc.2009.08.006},
	pages = {328--338},
	number = {4},
	journaltitle = {Learning and Instruction},
	shortjournal = {Learning and Instruction},
	author = {Cho, Kwangsu and {MacArthur}, Charles},
	urldate = {2025-01-26},
	date = {2010-08},
	langid = {english},
}

@online{noauthor_llama_nodate,
	title = {Llama 2 - Acceptable Use Policy - Meta {AI}},
	url = {https://ai.meta.com/llama-project/use-policy},
	urldate = {2025-01-26},
	langid = {english},
	file = {Snapshot:C\:\\Users\\moles\\Zotero\\storage\\5XK3V7TF\\use-policy.html:text/html},
}

@online{noauthor_meta_nodate,
	title = {Meta Llama 3 License},
	url = {https://www.llama.com/llama3/license/},
	abstract = {.},
	titleaddon = {Llama},
	urldate = {2025-01-26},
	langid = {english},
	file = {Snapshot:C\:\\Users\\moles\\Zotero\\storage\\TDA2N3IQ\\license.html:text/html},
}

@misc{murray_generative_2023,
	location = {Rochester, {NY}},
	title = {Generative {AI} Art: Copyright Infringement and Fair Use},
	url = {https://papers.ssrn.com/abstract=4483539},
	doi = {10.2139/ssrn.4483539},
	shorttitle = {Generative {AI} Art},
	abstract = {Generative {AI} Art: Copyright Infringement and Fair Use, 26 {SMU} Sci. \& Tech. L. Rev. 259 (2023){\textless}br{\textgreater}-------------------------------------------------------------------------------------------------------------{\textless}br{\textgreater}The discussion of {AI} copyright infringement or fair use often skips over all of the required steps of the infringement analysis in order to focus on the most intriguing question, “Could a visual generative {AI} generate a work that potentially infringes a preexisting copyrighted work?” and then the discussion skips further ahead to, “Would the {AI} have a fair use defense, most likely under the transformative test?” These are relevant questions, but in isolation from the actual steps of the copyright infringement analysis, the discussion is misleading or even irrelevant. This skipping of topics and stages of the infringement analysis does not train our attention to a properly accused party or entity whose actions prompt the question. The leaping from a question of infringement in the creation of training datasets to the creation of foundation models that draw from the training data to the actual operation of the generative {AI} system to produce images makes a false equivalency regarding the processes themselves and the persons responsible for them. The questions ought to shift focus from the persons compiling the training dataset used to train the {AI} system and the designers and creators of the {AI} system itself to the end users of the {AI} system who actually conceive of and cause the creation of images. {\textless}br{\textgreater}{\textless}br{\textgreater}The analysis of infringement or fair use in the generative {AI} context has suffered from widespread misunderstanding concerning the generative {AI} processes and the control and authorship of the end-user. Claimants, commentators, and regulators have made incorrect assumptions and inaccurate simplifications concerning the process, which I refer to as the Magic File Drawer theory, the Magic Copy Machine theory, and the Magic Box Artist theory. These theories, if they were true, would be much easier to envision and understand than the actual science and technology that goes into the creation and operation of a contemporary visual generative {AI} system. Throughout this Article, I will attempt to clarify and correct the understanding of the science and technology of the generative {AI} processes and explain the different roles of the training dataset designers, the generative {AI} system designers, and the end-users in the rendering of visual works by a generative {AI} system. {\textless}br{\textgreater}{\textless}br{\textgreater}Part {II} will discuss the requirements of a claim of copyright infringement including each step from the copyrightability of the claimant’s work, the doctrines that limit copyrightability, the requirement of an act of copying, and the infringement elements. {\textless}br{\textgreater}{\textless}br{\textgreater}Part {III} will summarize the copyright fair use test paying particular attention to the purpose and character of the use analysis, 17 U.S.C. § 107(1), and the current interpretation of the “transformative” test after Andy Warhol Foundation v. Goldsmith, particularly in circumstances relating to technology and the use of copyrighted or copyrightable data sources. {\textless}br{\textgreater}{\textless}br{\textgreater}Part {IV} will analyze potential infringement or fair use by the creators of generative {AI} training datasets. {\textless}br{\textgreater}{\textless}br{\textgreater}Part V will analyze potential infringement or fair use by the creators of visual generative {AI} systems. {\textless}br{\textgreater}{\textless}br{\textgreater}Part {VI} will analyze potential infringement or fair use by the end-users of visual generative {AI} systems.{\textless}br{\textgreater}{\textless}br{\textgreater}For all their complexity, visual generative {AI} systems are tools that depend on an end-user who conceives of and designs the image and provides the system with a prompt to set the generative process in motion. The end-users are responsible for crafting the prompt or series of prompts used, for evaluating the outputs of the generative {AI}, for adjusting and editing the iterations of images offered by the {AI} system, and ultimately for selecting and adopting one of the images generated by the {AI} as the final image. The end-users then make further decisions about the actual use and its function and purpose for the images the end-users selected and adopted from the outputs of the {AI}. In the course of working with the {AI} tool to try to produce a certain image, an end-user might steer the system to produce a work that could, under an infringement analysis, be regarded as potentially infringing, which would lead us again to the fair use analysis based on the end-user’s use of the image.},
	number = {4483539},
	publisher = {Social Science Research Network},
	author = {Murray, Michael D.},
	urldate = {2025-01-26},
	date = {2023-08-25},
	langid = {english},
	keywords = {fair use, {AI}, machine learning, artificial intelligence, copyright, derivative work, diffusion, foundation model, generative {AI}, generative pretrained transformer, infringement, latent space, prompt engineering, training data, transformative},
	file = {Full Text PDF:C\:\\Users\\moles\\Zotero\\storage\\ZQLLS25L\\Murray - 2023 - Generative AI Art Copyright Infringement and Fair Use.pdf:application/pdf},
}

@online{noauthor_regulation_nodate,
	title = {Regulation - {EU} - 2024/1689 - {EN} - {EUR}-Lex},
	url = {https://eur-lex.europa.eu/eli/reg/2024/1689/oj/eng},
	urldate = {2025-01-26},
	langid = {english},
	note = {Doc {ID}: 32024R1689
Doc Sector: 3
Doc Title: Regulation ({EU}) 2024/1689 of the European Parliament and of the Council of 13 June 2024 laying down harmonised rules on artificial intelligence and amending Regulations ({EC}) No 300/2008, ({EU}) No 167/2013, ({EU}) No 168/2013, ({EU}) 2018/858, ({EU}) 2018/1139 and ({EU}) 2019/2144 and Directives 2014/90/{EU}, ({EU}) 2016/797 and ({EU}) 2020/1828 (Artificial Intelligence Act) (Text with {EEA} relevance)
Doc Type: R
Usr\_lan: en},
	file = {Snapshot:C\:\\Users\\moles\\Zotero\\storage\\HT9EP4MC\\eng.html:text/html},
}

@online{young_harvards_nodate,
	title = {Harvard’s Library Innovation Lab launches initiative to use public domain data to train artificial intelligence},
	url = {https://hls.harvard.edu/today/harvards-library-innovation-lab-launches-initiative-to-use-public-domain-data-to-train-artificial-intelligence/},
	abstract = {The new program aims to make public domain materials housed at Harvard Law School Library and other knowledge institutions available to train {AI}.},
	titleaddon = {Harvard Law School},
	author = {Young, Scott},
	urldate = {2025-01-26},
	langid = {english},
	file = {Snapshot:C\:\\Users\\moles\\Zotero\\storage\\4KYWMI6T\\harvards-library-innovation-lab-launches-initiative-to-use-public-domain-data-to-train-artifici.html:text/html},
}

@online{noauthor_lintelligenza_nodate,
	title = {L’Intelligenza Artificiale {\textbar} {AUT}-Autori Federazione},
	url = {https://www.aut-autori.it/lintelligenza-artificiale/},
	titleaddon = {Federazione degli Autori di Letteratura, Cinema \& Audiovisivo, Teatro},
	urldate = {2025-01-26},
	langid = {italian},
	file = {Snapshot:C\:\\Users\\moles\\Zotero\\storage\\HL93Q4YJ\\lintelligenza-artificiale.html:text/html},
}

@article{biswas_guardrails_2023,
	title = {Guardrails for trust, safety, and ethical development and deployment of Large Language Models ({LLM})},
	volume = {4},
	issn = {2582-6921},
	url = {https://thesciencebrigade.com/jst/article/view/245},
	doi = {10.55662/JST.2023.4605},
	abstract = {The {AI} era has ushered in Large Language Models ({LLM}) to the technological forefront, which has been much of the talk in 2023, and is likely to remain as such for many years to come. {LLMs} are the {AI} models that are the power house behind generative {AI} applications such as {ChatGPT}. These {AI} models, fueled by vast amounts of data and computational prowess, have unlocked remarkable capabilities, from human-like text generation to assisting with natural language understanding ({NLU}) tasks. They have quickly become the foundation upon which countless applications and software services are being built, or at least being augmented with. However, as with any groundbreaking innovations, the rise of {LLMs} brings forth critical safety, privacy, and ethical concerns. These models are found to have a propensity to leak private information, produce false information, and can be coerced into generating content that can be used for nefarious purposes by bad actors, or even by regular users unknowingly. Implementing safeguards and guardrailing techniques is imperative for applications to ensure that the content generated by {LLMs} are safe, secure, and ethical. Thus, frameworks to deploy mechanisms that prevent misuse of these models via application implementations is imperative. In this study, we propose a Flexible Adaptive Sequencing mechanism with trust and safety modules, that can be used to implement safety guardrails for the development and deployment of {LLMs}.},
	pages = {55--82},
	number = {6},
	journaltitle = {Journal of Science \& Technology},
	author = {Biswas, Anjanava and Talukdar, Wrick},
	urldate = {2025-01-26},
	date = {2023-11-01},
	langid = {english},
	keywords = {large language models, ethical {AI}, guardrails, language model safety, trust and safety},
}

@article{floridi_unified_2019,
	title = {A Unified Framework of Five Principles for {AI} in Society},
	url = {https://hdsr.mitpress.mit.edu/pub/l0jsh9d1},
	doi = {10.1162/99608f92.8cd550d1},
	abstract = {Artificial Intelligence ({AI}) is already having a major impact on society. As a result, many organizations have launched a wide range of initiatives to establish ethical principles for the adoption of socially beneficial {AI}. Unfortunately, the sheer volume of proposed principles threatens to overwhelm and confuse. How might this problem of ‘principle proliferation’ be solved? In this paper, we report the results of a fine-grained analysis of several of the highest-profile sets of ethical principles for {AI}. We assess whether these principles converge upon a set of agreed-upon principles, or diverge, with significant disagreement over what constitutes ‘ethical {AI}.’ Our analysis finds a high degree of overlap among the sets of principles we analyze. We then identify an overarching framework consisting of five core principles for ethical {AI}. Four of them are core principles commonly used in bioethics: beneficence, non-maleficence, autonomy, and justice. On the basis of our comparative analysis, we argue that a new principle is needed in addition: explicability, understood as incorporating both the epistemological sense of intelligibility (as an answer to the question ‘how does it work?’) and in the ethical sense of accountability (as an answer to the question: ‘who is responsible for the way it works?’). In the ensuing discussion, we note the limitations and assess the implications of this ethical framework for future efforts to create laws, rules, technical standards, and best practices for ethical {AI} in a wide range of contexts.{KeywordsAccountability}; Autonomy; Artificial Intelligence; Beneficence; Ethics; Explicability; Fairness; Intelligibility; Justice; Non-maleficence.},
	journaltitle = {Harvard Data Science Review},
	shortjournal = {Harvard Data Science Review},
	author = {Floridi, Luciano and Cowls, Josh},
	urldate = {2025-01-26},
	date = {2019-06-23},
	langid = {english},
	file = {Full Text PDF:C\:\\Users\\moles\\Zotero\\storage\\UCKL97PP\\Floridi e Cowls - 2019 - A Unified Framework of Five Principles for AI in Society.pdf:application/pdf},
}

@online{vetere_minerva_2024,
	title = {Minerva, l’{IA} italiana al bivio tra Vannacci e Manzoni},
	url = {https://ilmanifesto.it/minerva-lia-italiana-al-bivio-tra-vannacci-e-manzoni},
	abstract = {Big Data e {IA} (Scuola) Il primo Language Model "italiano" sviluppato dall'Università Sapienza genera testi "tossici", non moderati, simili a quelli del più becero senso comune. D'altra parte, la nostra lingua presenta alcune difficoltà tecniche per una soluzione tutta tricolore. Di Guido Vetere},
	titleaddon = {il manifesto},
	author = {Vetere, Guido},
	urldate = {2025-01-26},
	date = {2024-05-13},
	langid = {italian},
	file = {Snapshot:C\:\\Users\\moles\\Zotero\\storage\\VFL8297F\\minerva-lia-italiana-al-bivio-tra-vannacci-e-manzoni.html:text/html},
}

@inproceedings{zhang_jailbreak_2024,
	location = {Bangkok, Thailand},
	title = {Jailbreak Open-Sourced Large Language Models via Enforced Decoding},
	url = {https://aclanthology.org/2024.acl-long.299/},
	doi = {10.18653/v1/2024.acl-long.299},
	abstract = {Large Language Models ({LLMs}) have achieved unprecedented performance in Natural Language Generation ({NLG}) tasks. However, many existing studies have shown that they could be misused to generate undesired content. In response, before releasing {LLMs} for public access, model developers usually align those language models through Supervised Fine-Tuning ({SFT}) or Reinforcement Learning with Human Feedback ({RLHF}). Consequently, those aligned large language models refuse to generate undesired content when facing potentially harmful/unethical requests. A natural question is “could alignment really prevent those open-sourced large language models from being misused to generate undesired content?”. In this work, we provide a negative answer to this question. In particular, we show those open-sourced, aligned large language models could be easily misguided to generate undesired content without heavy computations or careful prompt designs. Our key idea is to directly manipulate the generation process of open-sourced {LLMs} to misguide it to generate undesired content including harmful or biased information and even private data. We evaluate our method on 4 open-sourced {LLMs} accessible publicly and our finding highlights the need for more advanced mitigation strategies for open-sourced {LLMs}.},
	eventtitle = {{ACL} 2024},
	pages = {5475--5493},
	booktitle = {Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
	publisher = {Association for Computational Linguistics},
	author = {Zhang, Hangfan and Guo, Zhimeng and Zhu, Huaisheng and Cao, Bochuan and Lin, Lu and Jia, Jinyuan and Chen, Jinghui and Wu, Dinghao},
	editor = {Ku, Lun-Wei and Martins, Andre and Srikumar, Vivek},
	urldate = {2025-01-26},
	date = {2024-08},
	file = {Full Text PDF:C\:\\Users\\moles\\Zotero\\storage\\9VP77BWJ\\Zhang et al. - 2024 - Jailbreak Open-Sourced Large Language Models via Enforced Decoding.pdf:application/pdf},
}

@book{floridi_etica_2022,
	location = {Milano},
	edition = {Prima edizione},
	title = {Etica dell'intelligenza artificiale: sviluppi, opportunità, sfide},
	isbn = {978-88-3285-409-1},
	series = {Scienza e idee},
	shorttitle = {Etica dell'intelligenza artificiale},
	pagetotal = {384},
	number = {340},
	publisher = {Raffaello Cortina Editore},
	author = {Floridi, Luciano},
	editor = {Durante, Massimo},
	date = {2022},
}

@inproceedings{buolamwini_gender_2018,
	title = {Gender Shades: Intersectional Accuracy Disparities in Commercial Gender Classification},
	url = {https://proceedings.mlr.press/v81/buolamwini18a.html},
	shorttitle = {Gender Shades},
	abstract = {Recent studies demonstrate that machine learning algorithms can discriminate based on classes like race and gender. In this work, we present an approach to evaluate bias present in automated facial analysis algorithms and datasets with respect to phenotypic subgroups. Using the dermatologist  approved Fitzpatrick Skin Type classification system, we characterize the gender and skin type distribution of two facial analysis benchmarks, {IJB}-A and Adience. We find that these datasets are overwhelmingly composed of lighter-skinned subjects (79.6\% for {IJB}-A and 86.2\% for Adience) and introduce a new facial analysis dataset which is balanced by gender and skin type. We evaluate 3 commercial gender classification systems using our dataset and show that darker-skinned females are the most misclassified group (with error rates of up to 34.7\%). The maximum error rate for lighter-skinned males is 0.8\%. The substantial disparities in the accuracy of classifying darker females, lighter females, darker males, and lighter males in gender classification systems require urgent attention if commercial companies are to build genuinely fair, transparent and accountable facial analysis algorithms.},
	eventtitle = {Conference on Fairness, Accountability and Transparency},
	pages = {77--91},
	booktitle = {Proceedings of the 1st Conference on Fairness, Accountability and Transparency},
	publisher = {{PMLR}},
	author = {Buolamwini, Joy and Gebru, Timnit},
	urldate = {2025-01-26},
	date = {2018-01-21},
	langid = {english},
	note = {{ISSN}: 2640-3498},
	file = {Full Text PDF:C\:\\Users\\moles\\Zotero\\storage\\KRTXC9HB\\Buolamwini e Gebru - 2018 - Gender Shades Intersectional Accuracy Disparities in Commercial Gender Classification.pdf:application/pdf;Supplementary PDF:C\:\\Users\\moles\\Zotero\\storage\\Y57YAK43\\Buolamwini e Gebru - 2018 - Gender Shades Intersectional Accuracy Disparities in Commercial Gender Classification.pdf:application/pdf},
}

@online{harding_facial_2023,
	title = {Facial Recognition Bias: Why Racism Appears In Face Detection Tech},
	url = {https://foundation.mozilla.org/en/blog/facial-recognition-bias/},
	shorttitle = {Facial Recognition Bias},
	abstract = {Facial recognition tech routinely performs worse when attempting to detect Black and brown faces. Why do face detection tools struggle with dark skin?},
	titleaddon = {Mozilla Foundation},
	author = {Harding, Xavier},
	urldate = {2025-01-26},
	date = {2023-08-07},
	langid = {english},
	note = {Section: Advocacy},
	file = {Snapshot:C\:\\Users\\moles\\Zotero\\storage\\3MFUNJ38\\facial-recognition-bias.html:text/html},
}

@online{field_openai_2024,
	title = {{OpenAI} sees roughly \$5 billion loss this year on \$3.7 billion in revenue},
	url = {https://www.cnbc.com/2024/09/27/openai-sees-5-billion-loss-this-year-on-3point7-billion-in-revenue.html},
	abstract = {{CNBC} has confirmed that {OpenAI} expects about \$5 billion in losses on \$3.7 billion in revenue this year — figures first reported by The New York Times.},
	titleaddon = {{CNBC}},
	author = {Field, Hayden},
	urldate = {2025-01-26},
	date = {2024-09-27},
	langid = {english},
	note = {Section: Technology},
	file = {Snapshot:C\:\\Users\\moles\\Zotero\\storage\\LARLRJ8D\\openai-sees-5-billion-loss-this-year-on-3point7-billion-in-revenue.html:text/html},
}

@article{barassi_toward_2024,
	title = {Toward a Theory of {AI} Errors: Making Sense of Hallucinations, Catastrophic Failures, and the Fallacy of Generative {AI}},
	issn = {2644-2353, 2688-8513},
	url = {https://hdsr.mitpress.mit.edu/pub/1yo82mqa/release/2},
	doi = {10.1162/99608f92.ad8ebbd4},
	shorttitle = {Toward a Theory of {AI} Errors},
	abstract = {The rise of generative {AI} confronts us with new and key questions about {AI} failure, and how we make sense of and learn how to coexist with it. While computer scientists understand {AI} failure as something that we can learn from and predict, in this article I argue that we need to understand {AI} failure as a complex social reality that is defined by the interconnection between our data, technological design, and structural inequalities by processes of commodification and by everyday political and social conflicts. Yet I also show that to make sense of the complexity of {AI} failure we need a theory of {AI} errors. Bringing philosophical approaches to error theory together with anthropological perspectives, I argue that a theory of error is essential because it sheds light on the fact that the failures in our systems derive from processes of erroneous knowledge production, from mischaracterizations and flawed cognitive relations. A theory of {AI} errors, therefore, ultimately confronts us with the question about what types of cognitive relations and judgments define our {AI} systems, and sheds light on their deep-seeded limitations when it comes to making sense of our social worlds and human life.},
	issue = {Special Issue 5},
	journaltitle = {Harvard Data Science Review},
	author = {Barassi, Veronica},
	urldate = {2025-01-26},
	date = {2024-11-25},
	langid = {english},
	note = {Publisher: The {MIT} Press},
	file = {Full Text PDF:C\:\\Users\\moles\\Zotero\\storage\\3HTJS2RS\\Barassi - 2024 - Toward a Theory of AI Errors Making Sense of Hallucinations, Catastrophic Failures, and the Fallacy.pdf:application/pdf},
}

@misc{bastani_generative_2024,
	location = {Rochester, {NY}},
	title = {Generative {AI} Can Harm Learning},
	url = {https://papers.ssrn.com/abstract=4895486},
	doi = {10.2139/ssrn.4895486},
	abstract = {Generative artificial intelligence ({AI}) is poised to revolutionize how humans work, and has already demonstrated promise in significantly improving human productivity. However, a key remaining question is how generative {AI} affects learning, namely, how humans acquire new skills as they perform tasks. This kind of skill learning is critical to long-term productivity gains, especially in domains where generative {AI} is fallible and human experts must check its outputs. We study the impact of generative {AI}, specifically {OpenAI}'s {GPT}-4, on human learning in the context of math classes at a high school. In a field experiment involving nearly a thousand students, we have deployed and evaluated two {GPT} based tutors, one that mimics a standard {ChatGPT} interface (called {GPT} Base) and one with prompts designed to safeguard learning (called {GPT} Tutor). These tutors comprise about 15\% of the curriculum in each of three grades. Consistent with prior work, our results show that access to {GPT}-4 significantly improves performance (48\% improvement for {GPT} Base and 127\% for {GPT} Tutor). However, we additionally find that when access is subsequently taken away, students actually perform worse than those who never had access (17\% reduction for {GPT} Base). That is, access to {GPT}-4 can harm educational outcomes. These negative learning effects are largely mitigated by the safeguards included in {GPT} Tutor. Our results suggest that students attempt to use {GPT}-4 as a "crutch" during practice problem sessions, and when successful, perform worse on their own. Thus, to maintain long-term productivity, we must be cautious when deploying generative {AI} to ensure humans continue to learn critical skills.    * {HB}, {OB}, and {AS} contributed equally},
	number = {4895486},
	publisher = {Social Science Research Network},
	author = {Bastani, Hamsa and Bastani, Osbert and Sungu, Alp and Ge, Haosen and Kabakcı, Özge and Mariman, Rei},
	urldate = {2025-01-26},
	date = {2024-07-15},
	langid = {english},
	keywords = {Education, Generative {AI}, Human Capital Development, Human-{AI} Collaboration, Large Language Models},
	file = {Full Text PDF:C\:\\Users\\moles\\Zotero\\storage\\JCAYVPXI\\Bastani et al. - 2024 - Generative AI Can Harm Learning.pdf:application/pdf},
}

@online{noauthor_beyond_nodate,
	title = {Beyond exams: Investigating {AI} tool impact on student attitudes, ethical awareness, and academic dishonesty in online college assessments},
	url = {https://iiari.org/journal_article/beyond-exams-investigating-ai-tool-impact-on-student-attitudes-ethical-awareness-and-academic-dishonesty-in-online-college-assessments/},
	shorttitle = {Beyond exams},
	abstract = {Ababneh, K. I., Ahmed, K., \& Dedousis, E. (2022). Predictors of cheating in online exams among business students during the Covid pandemic: Testing the theory of planned behavior. The International Journal of Management Education, 20(3), 100713. https://doi.org/10.1016/j.ijme.2022.100713 Adama, E.A., Graf, A., Adusei-Asante, K. et al. (2023). {COVID}-19 and alternative assessments in higher education: implications for academic integrity among nursing […]},
	titleaddon = {{IIARI}},
	urldate = {2025-01-26},
	langid = {american},
	file = {Snapshot:C\:\\Users\\moles\\Zotero\\storage\\58US55CL\\beyond-exams-investigating-ai-tool-impact-on-student-attitudes-ethical-awareness-and-academic-d.html:text/html},
}

@article{ji_survey_2023,
	title = {Survey of Hallucination in Natural Language Generation},
	volume = {55},
	issn = {0360-0300},
	url = {https://doi.org/10.1145/3571730},
	doi = {10.1145/3571730},
	abstract = {Natural Language Generation ({NLG}) has improved exponentially in recent years thanks to the development of sequence-to-sequence deep learning technologies such as Transformer-based language models. This advancement has led to more fluent and coherent {NLG}, leading to improved development in downstream tasks such as abstractive summarization, dialogue generation, and data-to-text generation. However, it is also apparent that deep learning based generation is prone to hallucinate unintended text, which degrades the system performance and fails to meet user expectations in many real-world scenarios. To address this issue, many studies have been presented in measuring and mitigating hallucinated texts, but these have never been reviewed in a comprehensive manner before.In this survey, we thus provide a broad overview of the research progress and challenges in the hallucination problem in {NLG}. The survey is organized into two parts: (1) a general overview of metrics, mitigation methods, and future directions, and (2) an overview of task-specific research progress on hallucinations in the following downstream tasks, namely abstractive summarization, dialogue generation, generative question answering, data-to-text generation, and machine translation. This survey serves to facilitate collaborative efforts among researchers in tackling the challenge of hallucinated texts in {NLG}.},
	pages = {248:1--248:38},
	number = {12},
	journaltitle = {{ACM} Comput. Surv.},
	author = {Ji, Ziwei and Lee, Nayeon and Frieske, Rita and Yu, Tiezheng and Su, Dan and Xu, Yan and Ishii, Etsuko and Bang, Ye Jin and Madotto, Andrea and Fung, Pascale},
	urldate = {2025-01-26},
	date = {2023-03-03},
	file = {Versione inviata:C\:\\Users\\moles\\Zotero\\storage\\8SC8JT7L\\Ji et al. - 2023 - Survey of Hallucination in Natural Language Generation.pdf:application/pdf},
}

@article{seo_impact_2021,
	title = {The impact of artificial intelligence on learner–instructor interaction in online learning},
	volume = {18},
	issn = {2365-9440},
	url = {https://doi.org/10.1186/s41239-021-00292-9},
	doi = {10.1186/s41239-021-00292-9},
	abstract = {Artificial intelligence ({AI}) systems offer effective support for online learning and teaching, including personalizing learning for students, automating instructors’ routine tasks, and powering adaptive assessments. However, while the opportunities for {AI} are promising, the impact of {AI} systems on the culture of, norms in, and expectations about interactions between students and instructors are still elusive. In online learning, learner–instructor interaction (inter alia, communication, support, and presence) has a profound impact on students’ satisfaction and learning outcomes. Thus, identifying how students and instructors perceive the impact of {AI} systems on their interaction is important to identify any gaps, challenges, or barriers preventing {AI} systems from achieving their intended potential and risking the safety of these interactions. To address this need for forward-looking decisions, we used Speed Dating with storyboards to analyze the authentic voices of 12 students and 11 instructors on diverse use cases of possible {AI} systems in online learning. Findings show that participants envision adopting {AI} systems in online learning can enable personalized learner–instructor interaction at scale but at the risk of violating social boundaries. Although {AI} systems have been positively recognized for improving the quantity and quality of communication, for providing just-in-time, personalized support for large-scale settings, and for improving the feeling of connection, there were concerns about responsibility, agency, and surveillance issues. These findings have implications for the design of {AI} systems to ensure explainability, human-in-the-loop, and careful data collection and presentation. Overall, contributions of this study include the design of {AI} system storyboards which are technically feasible and positively support learner–instructor interaction, capturing students’ and instructors’ concerns of {AI} systems through Speed Dating, and suggesting practical implications for maximizing the positive impact of {AI} systems while minimizing the negative ones.},
	pages = {54},
	number = {1},
	journaltitle = {International Journal of Educational Technology in Higher Education},
	shortjournal = {International Journal of Educational Technology in Higher Education},
	author = {Seo, Kyoungwon and Tang, Joice and Roll, Ido and Fels, Sidney and Yoon, Dongwook},
	urldate = {2025-01-26},
	date = {2021-10-26},
	keywords = {Artificial intelligence, Boundary, Learner–instructor interaction, Online learning, Speed dating},
	file = {Full Text PDF:C\:\\Users\\moles\\Zotero\\storage\\XQ84QVVG\\Seo et al. - 2021 - The impact of artificial intelligence on learner–instructor interaction in online learning.pdf:application/pdf;Snapshot:C\:\\Users\\moles\\Zotero\\storage\\CNK96ZI3\\s41239-021-00292-9.html:text/html},
}

@article{stadler_cognitive_2024,
	title = {Cognitive ease at a cost: {LLMs} reduce mental effort but compromise depth in student scientific inquiry},
	volume = {160},
	issn = {0747-5632},
	url = {https://www.sciencedirect.com/science/article/pii/S0747563224002541},
	doi = {10.1016/j.chb.2024.108386},
	shorttitle = {Cognitive ease at a cost},
	abstract = {This study explores the cognitive load and learning outcomes associated with using large language models ({LLMs}) versus traditional search engines for information gathering during learning. A total of 91 university students were randomly assigned to either use {ChatGPT}3.5 or Google to research the socio-scientific issue of nanoparticles in sunscreen to derive valid recommendations and justifications. The study aimed to investigate potential differences in cognitive load, as well as the quality and homogeneity of the students' recommendations and justifications. Results indicated that students using {LLMs} experienced significantly lower cognitive load. However, despite this reduction, these students demonstrated lower-quality reasoning and argumentation in their final recommendations compared to those who used traditional search engines. Further, the homogeneity of the recommendations and justifications did not differ significantly between the two groups, suggesting that {LLMs} did not restrict the diversity of students’ perspectives. These findings highlight the nuanced implications of digital tools on learning, suggesting that while {LLMs} can decrease the cognitive burden associated with information gathering during a learning task, they may not promote deeper engagement with content necessary for high-quality learning per se.},
	pages = {108386},
	journaltitle = {Computers in Human Behavior},
	shortjournal = {Computers in Human Behavior},
	author = {Stadler, Matthias and Bannert, Maria and Sailer, Michael},
	urldate = {2025-01-26},
	date = {2024-11-01},
	file = {Full text:C\:\\Users\\moles\\Zotero\\storage\\MXF3T8WJ\\Stadler et al. - 2024 - Cognitive ease at a cost LLMs reduce mental effort but compromise depth in student scientific inqui.pdf:application/pdf;ScienceDirect Snapshot:C\:\\Users\\moles\\Zotero\\storage\\J5IJIFSW\\S0747563224002541.html:text/html},
}

@article{trisnawati_impact_2023,
	title = {The Impact of Artificial Intelligent in Education toward 21st Century Skills: A Literature Review},
	volume = {2},
	rights = {Copyright (c) 2023 Winda Trisnawati, Randi Eka Putra, Levandra Balti},
	issn = {2829-5196},
	url = {https://ejournal.ppsdp.org/index.php/pijed/article/view/152},
	doi = {10.59175/pijed.v2i2.152},
	shorttitle = {The Impact of Artificial Intelligent in Education toward 21st Century Skills},
	abstract = {This study aims to identify the impact of Artificial Intelligent in Education ({AIED}) toward 21st century skills. The method was a literature review. The research question was “What is the impact of artificial intelligent in education toward 21st century skills?”. The selection and categorization of literature were carried out in the following procedures. The academic databases used to collect papers were Science Direct, Scopus, and Google Scholar.\&nbsp; ("artificial intelligence" {OR} "{AI}" {OR} "{AIED}") {AND} ("21st century skills" {OR} "6C"). The time period under review was mainly from 2013 to 2023. It used {MAXDQA} for thematic analysis. The result shows that artificial intelligent in education have impact toward 21st century skills (6C); character, citizenship, critical thinking, creativity, collaboration, and communication. Various educational and learning issues are addressed using {AI} techniques. Learning is supported by {AI}, and students collaborate with {AI}. By collaborating students and {AI}, it gives the positive and negative impact toward students’ 21st century skills. the impact of {AIED} is indeed able to facilitate students’ problems in learning while the ability of students in critical thinking, creative thinking, and character will decrease.},
	pages = {501--513},
	number = {2},
	journaltitle = {{PPSDP} International Journal of Education},
	author = {Trisnawati, Winda and Putra, Randi Eka and Balti, Levandra},
	urldate = {2025-01-26},
	date = {2023-11-10},
	langid = {english},
	note = {Number: 2},
	keywords = {21st Century Skills},
	file = {Full Text PDF:C\:\\Users\\moles\\Zotero\\storage\\WYZ5JMXH\\Trisnawati et al. - 2023 - The Impact of Artificial Intelligent in Education toward 21st Century Skills A Literature Review.pdf:application/pdf},
}

@article{weiser_heres_2023,
	title = {Here’s What Happens When Your Lawyer Uses {ChatGPT}},
	issn = {0362-4331},
	url = {https://www.nytimes.com/2023/05/27/nyregion/avianca-airline-lawsuit-chatgpt.html},
	abstract = {A lawyer representing a man who sued an airline relied on artificial intelligence to help prepare a court filing. It did not go well.},
	journaltitle = {The New York Times},
	author = {Weiser, Benjamin},
	urldate = {2025-01-26},
	date = {2023-05-27},
	langid = {american},
	keywords = {{ChatGPT}, Artificial Intelligence, Avianca, internal-sub-only-except-search, Legal Profession, Manhattan ({NYC}), Rumors and Misinformation, Suits and Litigation (Civil)},
	file = {Snapshot:C\:\\Users\\moles\\Zotero\\storage\\IZ592VT3\\avianca-airline-lawsuit-chatgpt.html:text/html},
}

@book{mollick_co-intelligence_2024,
	location = {New York},
	title = {Co-intelligence: living and working with {AI}},
	isbn = {978-0-593-71671-7 978-0-593-85250-7},
	shorttitle = {Co-intelligence},
	abstract = {"From Wharton professor and author of the popular One Useful Thing Substack newsletter Ethan Mollick comes the definitive playbook for working, learning, and living in the new age of {AI}. The release of generative {AI}--from {LLMs} like {ChatGPT} to image generators like {DALL}-E-marks a new era. We have invented technologies that boost our physical capabilities and others that automate complex tasks, but never, until now, have we created a technology that can boost our intelligence--with an impact on work and life that researchers project will be greater than that of steam power or the internet. Mollick urges us not to turn away from {AI}, and instead to invite {AI} tools to the table. He demonstrates how {AI} can amplify our own capacities, acting in roles from brainstorming partner to cowriter to tutor to coach, and assesses its surprising, positive impact on business and organizations. Marshalling original research from workers and teams who are leading the rest of us in embracing and leveraging {AI}, Mollick cuts through the hype to make a frank and eye-opening case for the real value of {AI} tools. Moreover, Mollick argues that the long-term impact of {AI} will be different from what we expect, advantaging English majors and art history experts more than coders, and impacting knowledge workers more than blue-collar workers. Co-Intelligence shows what it means for individuals and for society to think together with smart machines, and why it's imperative that we all master that skill. Co-Intelligence challenges us to utilize {AI}'s power without losing our identity, learn from it without being misled, and harness its gifts to create a better human future. Thought-provoking, optimistic, and lucid, Co-Intelligence reveals the promise and power of generative {AI}"--},
	pagetotal = {234},
	publisher = {Portfolio/Penguin},
	author = {Mollick, Ethan},
	date = {2024},
	file = {Table of Contents PDF:C\:\\Users\\moles\\Zotero\\storage\\UIB5XG7B\\Mollick - 2024 - Co-intelligence living and working with AI.pdf:application/pdf},
}

@article{weber-wulff_testing_2023,
	title = {Testing of Detection Tools for {AI}-Generated Text},
	volume = {19},
	issn = {1833-2595},
	url = {http://arxiv.org/abs/2306.15666},
	doi = {10.1007/s40979-023-00146-z},
	abstract = {Recent advances in generative pre-trained transformer large language models have emphasised the potential risks of unfair use of artificial intelligence ({AI}) generated content in an academic environment and intensified efforts in searching for solutions to detect such content. The paper examines the general functionality of detection tools for artificial intelligence generated text and evaluates them based on accuracy and error type analysis. Specifically, the study seeks to answer research questions about whether existing detection tools can reliably differentiate between human-written text and {ChatGPT}-generated text, and whether machine translation and content obfuscation techniques affect the detection of {AI}-generated text. The research covers 12 publicly available tools and two commercial systems (Turnitin and {PlagiarismCheck}) that are widely used in the academic setting. The researchers conclude that the available detection tools are neither accurate nor reliable and have a main bias towards classifying the output as human-written rather than detecting {AI}-generated text. Furthermore, content obfuscation techniques significantly worsen the performance of tools. The study makes several significant contributions. First, it summarises up-to-date similar scientific and non-scientific efforts in the field. Second, it presents the result of one of the most comprehensive tests conducted so far, based on a rigorous research methodology, an original document set, and a broad coverage of tools. Third, it discusses the implications and drawbacks of using detection tools for {AI}-generated text in academic settings.},
	pages = {26},
	number = {1},
	journaltitle = {International Journal for Educational Integrity},
	shortjournal = {Int J Educ Integr},
	author = {Weber-Wulff, Debora and Anohina-Naumeca, Alla and Bjelobaba, Sonja and Foltýnek, Tomáš and Guerrero-Dib, Jean and Popoola, Olumide and Šigut, Petr and Waddington, Lorna},
	urldate = {2025-01-26},
	date = {2023-12-25},
	eprinttype = {arxiv},
	eprint = {2306.15666 [cs]},
	keywords = {Computer Science - Artificial Intelligence, Computer Science - Computation and Language, Computer Science - Computers and Society},
	file = {Preprint PDF:C\:\\Users\\moles\\Zotero\\storage\\3UKJ9SEI\\Weber-Wulff et al. - 2023 - Testing of Detection Tools for AI-Generated Text.pdf:application/pdf;Snapshot:C\:\\Users\\moles\\Zotero\\storage\\QT8SXR3P\\2306.html:text/html},
}

@article{zaitsu_distinguishing_2023,
	title = {Distinguishing {ChatGPT}(-3.5, -4)-generated and human-written papers through Japanese stylometric analysis},
	volume = {18},
	issn = {1932-6203},
	url = {https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0288453},
	doi = {10.1371/journal.pone.0288453},
	abstract = {In the first half of 2023, text-generative artificial intelligence ({AI}), including {ChatGPT} from {OpenAI}, has attracted considerable attention worldwide. In this study, first, we compared Japanese stylometric features of texts generated by {ChatGPT}, equipped with {GPT}-3.5 and {GPT}-4, and those written by humans. In this work, we performed multi-dimensional scaling ({MDS}) to confirm the distributions of 216 texts of three classes (72 academic papers written by 36 single authors, 72 texts generated by {GPT}-3.5, and 72 texts generated by {GPT}-4 on the basis of the titles of the aforementioned papers) focusing on the following stylometric features: (1) bigrams of parts-of-speech, (2) bigram of postpositional particle words, (3) positioning of commas, and (4) rate of function words. {MDS} revealed distinct distributions at each stylometric feature of {GPT} (3.5 and 4) and human. Although {GPT}-4 is more powerful than {GPT}-3.5 because it has more parameters, both {GPT} (3.5 and 4) distributions are overlapping. These results indicate that although the number of parameters may increase in the future, {GPT}-generated texts may not be close to that written by humans in terms of stylometric features. Second, we verified the classification performance of random forest ({RF}) classifier for two classes ({GPT} and human) focusing on Japanese stylometric features. This study revealed the high performance of {RF} in each stylometric feature: The {RF} classifier focusing on the rate of function words achieved 98.1\% accuracy. Furthermore the {RF} classifier focusing on all stylometric features reached 100\% in terms of all performance indexes (accuracy, recall, precision, and F1 score). This study concluded that at this stage we human discriminate {ChatGPT} from human limited to Japanese language.},
	pages = {e0288453},
	number = {8},
	journaltitle = {{PLOS} {ONE}},
	shortjournal = {{PLOS} {ONE}},
	author = {Zaitsu, Wataru and Jin, Mingzhe},
	urldate = {2025-01-26},
	date = {2023-08-09},
	langid = {english},
	note = {Publisher: Public Library of Science},
	keywords = {Language, Artificial intelligence, Linguistic morphology, Machine learning, Natural language processing, Social psychology, Support vector machines, Universities},
	file = {Full Text PDF:C\:\\Users\\moles\\Zotero\\storage\\9NLE6RP4\\Zaitsu e Jin - 2023 - Distinguishing ChatGPT(-3.5, -4)-generated and human-written papers through Japanese stylometric ana.pdf:application/pdf},
}

@article{dipardo_peer_1988,
	title = {Peer Response Groups in the Writing Classroom: Theoretic Foundations and New Directions},
	volume = {58},
	issn = {0034-6543},
	url = {https://www.jstor.org/stable/1170332},
	doi = {10.2307/1170332},
	shorttitle = {Peer Response Groups in the Writing Classroom},
	abstract = {The peer response group in which students respond to one another's writing is commonly used in the writing classroom, from kindergarten through college. Although enthusiastically advocated by practitioners and supported by current theories of the teaching and learning of writing, response groups are difficult to organize effectively. This review examines the pedagogical literature on response groups, places that literature in the context of current theories of the teaching and learning of writing, and then examines the small number of studies of peer response groups. Key issues include (a) the degree of teacher control over the groups and the effects of control structures, and (b) the kinds of social interactions within groups, with attention to how those interactions relate to the larger instructional context and to teaching and learning in the groups. Suggestions are made for reconceptualizing peer response to writing, with an emphasis on moving away from the teacher-initiated and controlled response group toward encouraging spontaneous peer talk during the writing process.},
	pages = {119--149},
	number = {2},
	journaltitle = {Review of Educational Research},
	author = {{DiPardo}, Anne and Freedman, Sarah Warshauer},
	urldate = {2025-01-26},
	date = {1988},
	note = {Publisher: [Sage Publications, Inc., American Educational Research Association]},
}

@article{nicol_making_2022-1,
	title = {Making internal feedback explicit: exploiting the multiple comparisons that occur during peer review},
	volume = {47},
	issn = {0260-2938},
	url = {https://doi.org/10.1080/02602938.2021.1924620},
	doi = {10.1080/02602938.2021.1924620},
	shorttitle = {Making internal feedback explicit},
	abstract = {This article explores peer review through the lens of internal feedback. It investigates the internal feedback that students generate when they compare their work with the work of peers and with comments received from peers. Inner feedback was made explicit by having students write an account of what they were learning from making these different comparisons. This allowed evaluation of the extent to which students’ self-generated feedback comments would match the feedback comments a teacher might provide, and exploration of other variables hypothesized to influence inner feedback generation. Analysis revealed that students’ self-generated feedback became more elaborate from one comparison to the next and that this, and multiple simultaneous comparisons, resulted in students’ generating feedback that not only matched the teacher’s feedback but surpassed it in powerful and productive ways. Comparisons against received peer comments added little to the feedback students had already generated from comparisons against peer works. The implications are that having students make explicit the internal feedback they generate not only helps them build their metacognitive knowledge and self-regulatory abilities but can also decrease teacher workload in providing comments.},
	pages = {424--443},
	number = {3},
	journaltitle = {Assessment \& Evaluation in Higher Education},
	author = {Nicol, David and {McCallum}, Suzanne},
	urldate = {2025-01-26},
	date = {2022-04-11},
	note = {Publisher: {SRHE} Website
\_eprint: https://doi.org/10.1080/02602938.2021.1924620},
	keywords = {Peer review, internal feedback, self-regulation, multiple comparisons, peer feedback},
	file = {Full Text PDF:C\:\\Users\\moles\\Zotero\\storage\\E4VQWCSY\\Nicol e McCallum - 2022 - Making internal feedback explicit exploiting the multiple comparisons that occur during peer review.pdf:application/pdf},
}

@article{krishna_kashyap_yakkala_ai-powered_2024,
	title = {{AI}-powered assessment tools for E-learning: Enhancing feedback and grading systems},
	volume = {13},
	issn = {25828266},
	url = {https://wjaets.com/node/1781},
	doi = {10.30574/wjaets.2024.13.1.0497},
	shorttitle = {{AI}-powered assessment tools for E-learning},
	abstract = {Recently, there has been increased incorporation of artificial intelligence in the education system, which has impacted the practice and use of assessment in learning. To this end, the following is a paper discussing the role of {AI} in grading, instant feedback, and personalized adaptive testing. The paper also covers the ability of {AI} to minimize bias and introduce fairness while grading, as well as the ability that comes with the mass {LMS} platforms. Moreover, this paper discusses the issues and guidelines regarding the ethical use of {AI} in education: data protection, the problem of automated prejudice in {AI}, and the replacement of human tutors. When these seemingly unimportant problems are solved, and institutions admit that ethical practices are critical, such opportunities for Artificial Intelligence can be harnessed to make learning more efficient and fairer. At last, integrating {AI} into learning benefits teachers and students: it enhances the teaching and learning processes. It creates the prospects for the future global exposition of technology with human creativity to enrich learners.},
	pages = {792--802},
	number = {1},
	journaltitle = {World Journal of Advanced Engineering Technology and Sciences},
	shortjournal = {World J. Adv. Eng. Technol. Sci.},
	author = {{Krishna Kashyap Yakkala}},
	urldate = {2025-01-26},
	date = {2024-10-30},
}

@article{borghi_manifesto_2019,
	title = {Un manifesto per la didattica della storia},
	issn = {2704-8217},
	url = {https://cris.unibo.it/handle/11585/731930},
	abstract = {open},
	author = {Borghi, Beatrice and Rolando, Borghi and Dondarini, Rolando},
	urldate = {2025-01-26},
	date = {2019},
	note = {Accepted: 2024-05-20T07:45:52Z},
	file = {Full Text PDF:C\:\\Users\\moles\\Zotero\\storage\\DWRQ5DLW\\Borghi et al. - 2019 - Un manifesto per la didattica della storia.pdf:application/pdf},
}

@article{ceccoli_prospettive_nodate,
	title = {Prospettive per la didattica della storia in Italia e in Europa, a cura di Valseriati Enrico},
	url = {https://www.academia.edu/105166900/Prospettive_per_la_didattica_della_storia_in_Italia_e_in_Europa_a_cura_di_Valseriati_Enrico},
	abstract = {Prospettive per la didattica della storia in Italia e in Europa, a cura di Valseriati Enrico},
	journaltitle = {Didactica Historica},
	author = {Ceccoli, Paolo},
	urldate = {2025-01-26},
	langid = {english},
	file = {Snapshot:C\:\\Users\\moles\\Zotero\\storage\\2MH6CR8J\\Prospettive_per_la_didattica_della_storia_in_Italia_e_in_Europa_a_cura_di_Valseriati_Enrico.html:text/html},
}

@book{panciera_manuale_2022,
	location = {Firenze},
	title = {Manuale di didattica della storia},
	isbn = {978-88-00-86248-6},
	publisher = {Le Monnier Università : Mondadori Education},
	author = {Panciera, Walter and Savio, Andrea},
	date = {2022},
}

@article{valseriati_prospettive_2019,
	title = {Prospettive per la didattica della storia in Italia e in Europa},
	url = {https://www.torrossa.com/it/resources/an/4476998},
	abstract = {Compra online il {PDF} di Prospettive per la didattica della storia in Italia e in Europa, Valseriati, Enrico, editor - New Digital Press - E-book},
	pages = {1--228},
	author = {Valseriati, Enrico},
	urldate = {2025-01-26},
	date = {2019},
	langid = {italian},
	note = {Publisher: New Digital Press},
}

@article{brusa_quale_2006,
	title = {Quale Storia? Intervista sulla didattica, l'insegnamento e la ricerca storiografica},
	volume = {2},
	issn = {1825-411X},
	url = {http://storicamente.org/02intervista_brusa},
	doi = {10.1473/stor327},
	shorttitle = {Quale Storia?},
	abstract = {More about this open access article on {DOAJ}. {DOAJ} is an online directory that indexes and provides access to quality open access, peer-reviewed journals.},
	number = {1},
	journaltitle = {Storicamente},
	author = {Brusa, Antonio},
	urldate = {2025-01-26},
	date = {2006-12-01},
	langid = {english},
	note = {Publisher: Viella Editrice},
}

@collection{davies_debates_2017,
	location = {London},
	edition = {2},
	title = {Debates in History Teaching},
	isbn = {978-1-315-64286-4},
	abstract = {Now in its second edition, Debates in History Teaching remains at the cutting edge of history education. It has been fully updated to take into account the latest developments in policy, research and professional practice. With further exploration into the major issues that history teachers encounter in their daily professional lives, it provides fresh guidance for thinking and practice for teachers within the {UK} and beyond.
Written by a range of experts in history education, chapters cover all the key issues needed for clear thinking and excellent professional action. This book will enable you to reach informed judgements and argue your point of view with deeper theoretical knowledge and understanding. Debates include:


What is happening today in history education?


What is the purpose of history teaching?


What do history teachers need to know?


What are the key trends and issues in international contexts?


What is the role of evidence in history teaching and learning?


How should you make use of {ICT} in your lessons?


Should moral learning be an aim of history education?


How should history learning be assessed?


Debates in History Teaching remains essential reading for any student or practising teacher engaged in initial training, continuing professional development or Master's-level study.},
	pagetotal = {322},
	publisher = {Routledge},
	editor = {Davies, Ian},
	date = {2017-02-16},
	doi = {10.4324/9781315642864},
	file = {Versione inviata:C\:\\Users\\moles\\Zotero\\storage\\TA9SQG7P\\Davies - 2017 - Debates in History Teaching.pdf:application/pdf},
}

@collection{lahby_empowering_2024,
	location = {Boca Raton},
	title = {Empowering Digital Education with {ChatGPT}: From Theoretical to Practical Applications},
	isbn = {978-1-032-71635-0},
	shorttitle = {Empowering Digital Education with {ChatGPT}},
	abstract = {Recently, there has been a significant increase in the development and interest in applying generative {AI} across various domains, including education. The emergence of large language models ({LLMs}), such as the {ChatGPT} tool, fueled by advancements in generative {AI}, is profoundly reshaping education. The use of the {ChatGPT} tool offers personalized support, improves accessibility, and introduces innovative methods for students and educators to engage with information and learning materials. Furthermore, {ChatGPT} facilitates a wide range of language learning services, including language instruction, speech recognition, pronunciation feedback, and immersive virtual simulations for hands-on learning experiences.
This book explores the transformative potential of the {ChatGPT} tool within education, shedding light on the opportunities that arise through the integration of the {ChatGPT} tool into various aspects of the learning process. It serves as a platform for the community to share cutting-edge research ideas concerning the use of the {ChatGPT} tool in digital education. Readers will discover how the {ChatGPT} tool can enhance student engagement, foster personalized learning experiences, facilitate intelligent tutoring systems, support virtual classroom interactions, and revolutionize assessment and feedback mechanisms.},
	pagetotal = {302},
	publisher = {Chapman and Hall/{CRC}},
	editor = {Lahby, Mohamed},
	date = {2024-11-12},
	doi = {10.1201/9781032716350},
}

@article{gorvine_teaching_1970,
	title = {Teaching History through Role Playing},
	journaltitle = {Hist Teacher},
	author = {Gorvine, Harold},
	urldate = {2025-01-26},
	date = {1970},
	langid = {english},
	note = {{ERIC} Number: {EJ}020901},
	keywords = {History Instruction, Identification, Learning Activities, Role Playing, Student Experience, Teaching Methods},
	file = {Snapshot:C\:\\Users\\moles\\Zotero\\storage\\W2KWMRXC\\eric.ed.gov.html:text/html},
}

@incollection{obrien_openai_2024,
	title = {{OpenAI} {ChatGPT} and Biased Information in Higher Education},
	isbn = {978-1-032-71635-0},
	abstract = {Motivated by the appearance of large language models and their sudden societal impacts—both beneficial and harmful, realized and potential—we evaluated several of them with respect to bias in its myriad forms. Bias in machine-learning models refers to their tendencies to make certain decisions more often than expected. This is a result of the text on which they were trained and, in some cases, the result of post-learning human manipulation. In the end, whether it occurs in the real world or in the machine-learning world, bias will always be a subject of discussion and debate. We view that debate as becoming more and more important, given the recent, unprecedented explosion of {AI}—in particular, {OpenAI} and its chatbot, {ChatGPT}—and what it might mean for the future of higher education.},
	booktitle = {Empowering Digital Education with {ChatGPT}},
	publisher = {Chapman and Hall/{CRC}},
	author = {O'Brien, Michael J. and Alsmadi, Izzat and Bentley, R. Alexander and Tuba, Milan},
	date = {2024},
	note = {Num Pages: 12},
}

@article{roose_can_2024,
	title = {Can A.I. Be Blamed for a Teen’s Suicide?},
	issn = {0362-4331},
	url = {https://www.nytimes.com/2024/10/23/technology/characterai-lawsuit-teen-suicide.html},
	abstract = {The mother of a 14-year-old Florida boy says he became obsessed with a chatbot on Character.{AI} before his death.},
	journaltitle = {The New York Times},
	author = {Roose, Kevin},
	urldate = {2025-01-27},
	date = {2024-10-23},
	langid = {american},
	keywords = {Artificial Intelligence, Suits and Litigation (Civil), Character.{AI}, Children and Childhood, Computers and the Internet, De Freitas, Daniel, Garcia, Megan L, Google Inc, Loneliness, Mental Health and Disorders, Mobile Applications, Setzer, Sewell {III} (2009-24), Shazeer, Noam, Teenagers and Adolescence},
	file = {Snapshot:C\:\\Users\\moles\\Zotero\\storage\\QP8HVNCD\\characterai-lawsuit-teen-suicide.html:text/html},
}

@book{reich_failure_2020,
	location = {Cambridge London},
	title = {Failure to disrupt: why technology alone can't transform education},
	isbn = {978-0-674-08904-4 978-0-674-24966-0 978-0-674-24967-7 978-0-674-24968-4},
	shorttitle = {Failure to disrupt},
	pagetotal = {1},
	publisher = {Harvard University Press},
	author = {Reich, Justin},
	date = {2020},
}

@article{ouyang_training_2022,
	title = {Training language models to follow instructions with human feedback},
	volume = {35},
	url = {https://proceedings.neurips.cc/paper_files/paper/2022/hash/b1efde53be364a73914f58805a001731-Abstract-Conference.html},
	pages = {27730--27744},
	journaltitle = {Advances in Neural Information Processing Systems},
	author = {Ouyang, Long and Wu, Jeffrey and Jiang, Xu and Almeida, Diogo and Wainwright, Carroll and Mishkin, Pamela and Zhang, Chong and Agarwal, Sandhini and Slama, Katarina and Ray, Alex and Schulman, John and Hilton, Jacob and Kelton, Fraser and Miller, Luke and Simens, Maddie and Askell, Amanda and Welinder, Peter and Christiano, Paul F. and Leike, Jan and Lowe, Ryan},
	urldate = {2025-01-29},
	date = {2022-12-06},
	langid = {english},
	file = {Full Text PDF:C\:\\Users\\moles\\Zotero\\storage\\N4RKDSPP\\Ouyang et al. - 2022 - Training language models to follow instructions with human feedback.pdf:application/pdf},
}

@misc{greshake_not_2023,
	title = {Not what you've signed up for: Compromising Real-World {LLM}-Integrated Applications with Indirect Prompt Injection},
	url = {http://arxiv.org/abs/2302.12173},
	doi = {10.48550/arXiv.2302.12173},
	shorttitle = {Not what you've signed up for},
	abstract = {Large Language Models ({LLMs}) are increasingly being integrated into various applications. The functionalities of recent {LLMs} can be flexibly modulated via natural language prompts. This renders them susceptible to targeted adversarial prompting, e.g., Prompt Injection ({PI}) attacks enable attackers to override original instructions and employed controls. So far, it was assumed that the user is directly prompting the {LLM}. But, what if it is not the user prompting? We argue that {LLM}-Integrated Applications blur the line between data and instructions. We reveal new attack vectors, using Indirect Prompt Injection, that enable adversaries to remotely (without a direct interface) exploit {LLM}-integrated applications by strategically injecting prompts into data likely to be retrieved. We derive a comprehensive taxonomy from a computer security perspective to systematically investigate impacts and vulnerabilities, including data theft, worming, information ecosystem contamination, and other novel security risks. We demonstrate our attacks' practical viability against both real-world systems, such as Bing's {GPT}-4 powered Chat and code-completion engines, and synthetic applications built on {GPT}-4. We show how processing retrieved prompts can act as arbitrary code execution, manipulate the application's functionality, and control how and if other {APIs} are called. Despite the increasing integration and reliance on {LLMs}, effective mitigations of these emerging threats are currently lacking. By raising awareness of these vulnerabilities and providing key insights into their implications, we aim to promote the safe and responsible deployment of these powerful models and the development of robust defenses that protect users and systems from potential attacks.},
	number = {{arXiv}:2302.12173},
	publisher = {{arXiv}},
	author = {Greshake, Kai and Abdelnabi, Sahar and Mishra, Shailesh and Endres, Christoph and Holz, Thorsten and Fritz, Mario},
	urldate = {2025-01-29},
	date = {2023-05-05},
	eprinttype = {arxiv},
	eprint = {2302.12173 [cs]},
	keywords = {Computer Science - Artificial Intelligence, Computer Science - Computation and Language, Computer Science - Computers and Society, Computer Science - Cryptography and Security},
	file = {Preprint PDF:C\:\\Users\\moles\\Zotero\\storage\\JPUT62WF\\Greshake et al. - 2023 - Not what you've signed up for Compromising Real-World LLM-Integrated Applications with Indirect Pro.pdf:application/pdf;Snapshot:C\:\\Users\\moles\\Zotero\\storage\\YFMRZB75\\2302.html:text/html},
}

@online{wilison_simon_2022,
	title = {Simon Willison: Prompt injection},
	url = {https://simonwillison.net/series/prompt-injection/},
	author = {Wilison, Simon},
	urldate = {2025-01-29},
	date = {2022},
	file = {Simon Willison\: Prompt injection:C\:\\Users\\moles\\Zotero\\storage\\ITWFQJA6\\prompt-injection.html:text/html},
}

@online{noauthor_stolen_nodate,
	title = {Stolen Creativity},
	url = {https://www.goethe.de/prj/geg/en/thm/tru/25358570.html},
	abstract = {The German photomedia artist Boris Eldagsen won the Sony World Photography Awards in April 2023. The unusual thing: He entered with an {AI}-generated piece and ultimately rejected the prize. Eldagsen weighs in on {AI} and creativity.},
	urldate = {2025-01-30},
	langid = {english},
	file = {Snapshot:C\:\\Users\\moles\\Zotero\\storage\\ZA9R8RUE\\25358570.html:text/html},
}

@online{noauthor_habla_nodate,
	title = {Habla con Meta {AI} en {WhatsApp}},
	url = {https://blog.whatsapp.com/talk-to-meta-ai-on-whatsapp},
	abstract = {En Meta Connect, hoy anunciamos un nuevo conjunto de actualizaciones que te permitirán hablar con Meta {AI} en tiempo real con tu voz o enviarle fotos para editarlas. Con estas actualizaciones, más personas podrán explorar sus ideas, mejorar sus chats y probar cosas nuevas con mayor facilidad.},
	titleaddon = {{WhatsApp}.com},
	urldate = {2025-01-30},
	langid = {spanish},
}

@online{noauthor_como_nodate,
	title = {Cómo seleccionar un modelo de Meta Llama {\textbar} Servicio de ayuda de {WhatsApp}},
	url = {https://faq.whatsapp.com/935316661611520/?cms_platform=android&locale=es_LA},
	urldate = {2025-01-30},
}

@online{noauthor_streamlit_nodate,
	title = {Streamlit Docs},
	url = {https://docs.streamlit.io/},
	urldate = {2025-01-31},
	file = {Snapshot:C\:\\Users\\moles\\Zotero\\storage\\S8I7LFFK\\docs.streamlit.io.html:text/html},
}

@online{noauthor_tutorial_nodate,
	title = {Tutorial: inizia a utilizzare l'{API} Gemini {\textbar} Gemini {API}},
	url = {https://ai.google.dev/gemini-api/docs/get-started/tutorial?hl=it},
	shorttitle = {Tutorial},
	abstract = {Inizia a utilizzare l'{API} Gemini e Gemini 1.5 Flash, incluse la generazione di testo e le funzionalità di chat},
	titleaddon = {Google {AI} for Developers},
	urldate = {2025-01-31},
	langid = {italian},
	file = {Snapshot:C\:\\Users\\moles\\Zotero\\storage\\7CTQND5F\\tutorial.html:text/html},
}

@online{noauthor_beautiful_nodate,
	title = {Beautiful Soup Documentation — Beautiful Soup 4.4.0 documentation},
	url = {https://beautiful-soup-4.readthedocs.io/en/latest/},
	urldate = {2025-01-31},
	file = {Beautiful Soup Documentation — Beautiful Soup 4.4.0 documentation:C\:\\Users\\moles\\Zotero\\storage\\IF8V7RTZ\\latest.html:text/html},
}

@online{noauthor_open_nodate,
	title = {The Open Source {AI} Definition – 1.0},
	url = {https://opensource.org/ai/open-source-ai-definition},
	abstract = {version 1.0 Preamble Why we need Open Source Artificial Intelligence ({AI}) Open Source has demonstrated that massive benefits accrue to everyone after removing the barriers to learning, using, sharing and…},
	titleaddon = {Open Source Initiative},
	urldate = {2025-02-01},
	langid = {american},
	file = {Snapshot:C\:\\Users\\moles\\Zotero\\storage\\PUJISL86\\open-source-ai-definition.html:text/html},
}

@online{noauthor_metas_2023,
	title = {Meta’s {LLaMa} 2 license is not Open Source},
	url = {https://opensource.org/blog/metas-llama-2-license-is-not-open-source},
	abstract = {Meta is lowering barriers for access to powerful {AI} systems, but unfortunately, Meta has created the misunderstanding that {LLaMa} 2 is “open source” - it is not.},
	titleaddon = {Open Source Initiative},
	urldate = {2025-02-01},
	date = {2023-07-20},
	langid = {american},
	file = {Snapshot:C\:\\Users\\moles\\Zotero\\storage\\WESL7KLX\\metas-llama-2-license-is-not-open-source.html:text/html},
}

@article{pimbblet_can_2024,
	title = {Can {ChatGPT} pass a physics degree? Making a case for reformation of assessment of undergraduate degrees},
	volume = {46},
	issn = {0143-0807},
	url = {https://dx.doi.org/10.1088/1361-6404/ad9874},
	doi = {10.1088/1361-6404/ad9874},
	shorttitle = {Can {ChatGPT} pass a physics degree?},
	abstract = {The emergence of conversational natural language processing models presents a significant challenge for Higher Education. In this work, we use the entirety of a {UK} Physics undergraduate ({BSc} with Honours) degree including all examinations and coursework to test if {ChatGPT} ({GPT}-4) can pass a degree. We adopt a ‘maximal cheating’ approach wherein we permit ourselves to modify questions for clarity, split question up into smaller sub-components, expand on answers given—especially for long form written responses, obtaining references, and use of advanced coaching, plug-ins and custom instructions to optimize outputs. In general, there are only certain parts of the degree in question where {GPT}-4 fails. Explicitly these include compulsory laboratory elements, and the final project which is assessed by a viva. If these were no issue, then {GPT}-4 would pass with a grade of an upper second class overall. In general, coding tasks are performed exceptionally well, along with simple single-step solution problems. Multiple step problems and longer prose are generally poorer along with interdisciplinary problems. We strongly suggest that there is now a necessity to urgently re-think and revise assessment practice in physics—and other disciplines—due to the existence of {AI} such as {GPT}-4. We recommend close scrutiny of assessment tasks: only invigilated in-person examinations, vivas, laboratory skills testing (or ‘performances’ in other disciplines), and presentations are not vulnerable to {GPT}-4, and urge consideration of how {AI} can be embedded within the disciplinary context.},
	pages = {015702},
	number = {1},
	journaltitle = {European Journal of Physics},
	shortjournal = {Eur. J. Phys.},
	author = {Pimbblet, K A and Morrell, L J},
	urldate = {2025-02-01},
	date = {2024-12},
	langid = {english},
	note = {Publisher: {IOP} Publishing},
	file = {IOP Full Text PDF:C\:\\Users\\moles\\Zotero\\storage\\JVAX3AXU\\Pimbblet e Morrell - 2024 - Can ChatGPT pass a physics degree Making a case for reformation of assessment of undergraduate degr.pdf:application/pdf},
}

@inproceedings{zou_adversarial_2024,
	location = {Singapore},
	title = {Adversarial Attacks on Large Language Models},
	isbn = {978-981-97-5501-1},
	doi = {10.1007/978-981-97-5501-1_7},
	abstract = {Large Language Models ({LLMs}) have rapidly advanced and garnered increasing attention due to their remarkable capabilities across various applications. However, adversarial attacks pose a significant threat to {LLMs}, as prior research has demonstrated their vulnerability, resulting in prediction inaccuracies. This paper offers a foundational overview of {LLMs} and traces their developmental trajectory. We systematically classify and compare adversarial examples on {LLMs} based on their perturbation units. Additionally, we scrutinize the root causes of vulnerability and explore prevalent defense approaches tailored to mitigate adversarial attacks on {LLMs}.},
	pages = {85--96},
	booktitle = {Knowledge Science, Engineering and Management},
	publisher = {Springer Nature},
	author = {Zou, Jing and Zhang, Shungeng and Qiu, Meikang},
	editor = {Cao, Cungeng and Chen, Huajun and Zhao, Liang and Arshad, Junaid and Asyhari, Taufiq and Wang, Yonghao},
	date = {2024},
	langid = {english},
	keywords = {Adversarial Attacks, Adversarial Examples, Large Language Models ({LLMs}), Natural Language Processing, Security},
	file = {Full Text PDF:C\:\\Users\\moles\\Zotero\\storage\\8GES4JLT\\Zou et al. - 2024 - Adversarial Attacks on Large Language Models.pdf:application/pdf},
}

@misc{cocchiaro_who_2024,
	location = {Rochester, {NY}},
	title = {Who is an {AI} Ethicist? An Empirical Study of Expertise, Skills, and Profiles to Build a Competency Framework},
	url = {https://papers.ssrn.com/abstract=4891907},
	doi = {10.2139/ssrn.4891907},
	shorttitle = {Who is an {AI} Ethicist?},
	abstract = {Over the last decade the figure of the {AI} Ethicist has seen significant growth in the {ICT} market. However, only a few studies have taken an interest in this professional profile, and they have yet to provide a normative discussion of its expertise and skills. The goal of this article is to initiate such discussion. We argue that {AI} Ethicists should be experts and use a heuristic to identify them. Then, we focus on their specific kind of moral expertise, drawing on a parallel with the expertise of Ethics Consultants in clinical settings and on the bioethics literature on the topic. Finally, we highlight the differences between Health Care Ethics Consultants and {AI} Ethicists and derive the expertise and skills of the latter from the roles that {AI} Ethicists should have in an organisation.},
	number = {4891907},
	publisher = {Social Science Research Network},
	author = {Cocchiaro, Mariangela Zoe and Morley, Jessica and Novelli, Claudio and Panai, Enrico and Tartaro, Alessio and Floridi, Luciano},
	urldate = {2025-02-01},
	date = {2024-07-10},
	langid = {english},
	keywords = {{AI} Ethicist, ethics of {AI}, expertise, practical moral expertise, skills},
	file = {Full Text PDF:C\:\\Users\\moles\\Zotero\\storage\\FUVPRMK3\\Cocchiaro et al. - 2024 - Who is an AI Ethicist An Empirical Study of Expertise, Skills, and Profiles to Build a Competency F.pdf:application/pdf},
}

@article{noauthor_academic_2024,
	title = {Academic dishonesty in higher education - a nationwide study in Taiwan},
	url = {https://www.researchgate.net/publication/225487537_Academic_dishonesty_in_higher_education_-_a_nationwide_study_in_Taiwan},
	doi = {10.1007/s10734-006-9047-z},
	abstract = {Download Citation {\textbar} Academic dishonesty in higher education - a nationwide study in Taiwan {\textbar} Academic dishonesty has been an important issue. However, only few researches had been done in Asian countries, especially a nationwide study. A... {\textbar} Find, read and cite all the research you need on {ResearchGate}},
	journaltitle = {{ResearchGate}},
	urldate = {2025-02-01},
	date = {2024-10-22},
	langid = {english},
}

@online{hua_beyond_nodate,
	title = {Beyond exams: Investigating {AI} tool impact on student attitudes, ethical awareness, and academic dishonesty in online college assessments},
	url = {https://iiari.org/journal_article/beyond-exams-investigating-ai-tool-impact-on-student-attitudes-ethical-awareness-and-academic-dishonesty-in-online-college-assessments/},
	shorttitle = {Beyond exams},
	abstract = {Ababneh, K. I., Ahmed, K., \& Dedousis, E. (2022). Predictors of cheating in online exams among business students during the Covid pandemic: Testing the theory of planned behavior. The International Journal of Management Education, 20(3), 100713. https://doi.org/10.1016/j.ijme.2022.100713 Adama, E.A., Graf, A., Adusei-Asante, K. et al. (2023). {COVID}-19 and alternative assessments in higher education: implications for academic integrity among nursing […]},
	titleaddon = {{IIARI}},
	author = {Hua, Jocelyn},
	urldate = {2025-02-01},
	langid = {american},
}