Tesi/TesiZIP/Elementi esportati.bib
2025-02-02 23:26:26 +01:00

1044 lines
109 KiB
BibTeX
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

@article{aghaziarati_artificial_2023,
title = {Artificial Intelligence in Education: Investigating Teacher Attitudes},
volume = {1},
url = {https://journals.kmanpub.com/index.php/aitechbesosci/article/view/1973},
doi = {10.61838/kman.aitech.1.1.6},
shorttitle = {Artificial Intelligence in Education},
abstract = {This study aims to investigate teachers' attitudes towards {AI} in education, focusing on identifying the perceived benefits, challenges, and ethical considerations associated with {AI} integration into teaching and learning environments. Utilizing a qualitative research design, this study conducted semi-structured interviews with 28 educators from various educational levels and disciplines. Thematic analysis was employed to analyze the interview data, identifying key themes and concepts related to teachers' perspectives on {AI} in education. Four main themes were identified: Pedagogical Impacts, Ethical and Social Considerations, Technological Challenges and Opportunities, and Perceptions of {AI} in Education. Pedagogical Impacts encompassed enhancing learning outcomes, curriculum integration, and the evolving roles of teachers. Ethical and Social Considerations highlighted concerns over data privacy, bias, and equity. Technological Challenges and Opportunities discussed integration challenges and the future of educational technology. Lastly, Perceptions of {AI} in Education revealed varied attitudes, awareness levels, and perceived impacts on professional identity. Teachers recognize the transformative potential of {AI} in enhancing personalized learning and operational efficiency. However, concerns about ethical issues, technological infrastructure, and the need for professional development are significant. Addressing these concerns requires targeted efforts from policymakers, educational leaders, and technologists to foster a supportive environment for {AI} integration in education.},
pages = {35--42},
number = {1},
journaltitle = {{AI} and Tech in Behavioral and Social Sciences},
shortjournal = {aitechbesosci},
author = {Aghaziarati, Ali and Nejatifar, Sara and Abedi, Ahmad},
urldate = {2025-01-26},
date = {2023},
file = {Full text:files/658/Aghaziarati et al. - 2023 - Artificial Intelligence in Education Investigating Teacher Attitudes.pdf:application/pdf},
}
@article{bonsu_consumers_2023,
title = {From the Consumers Side: Determining Students Perception and Intention to Use {ChatGPTin} Ghanaian Higher Education},
issn = {1556-5068},
url = {https://www.ssrn.com/abstract=4387107},
doi = {10.2139/ssrn.4387107},
shorttitle = {From the Consumers Side},
journaltitle = {{SSRN} Electronic Journal},
shortjournal = {{SSRN} Journal},
author = {Bonsu, Emmanuel and Baffour-Koduah, Daniel},
urldate = {2025-01-26},
date = {2023},
langid = {english},
}
@article{garcia_sanchez_uso_2023,
title = {Uso y percepción de {ChatGPT} en la educación superior},
volume = {11},
issn = {23870893},
url = {https://riti.es/index.php/riti/article/view/261},
doi = {10.36825/RITI.11.23.009},
abstract = {This article aims to analyze higher education students' management and perception regarding using {ChatGPT} in their academic activities. To accomplish this, a descriptive study with a qualitative approach was employed to analyze the data obtained through a questionnaire administered to students from various majors at the Universidad Autónoma de Sinaloa. The instrument consisted of two sections with closed-ended questions and a Likert scale to measure the learners' perception. The results revealed that a minority of the respondents (33\%) had used {ChatGPT} in their school practices. Additionally, it was found that a significant proportion (75\%) did not consider the use of this tool suitable for their educational tasks, and a similar percentage (79\%) did not perceive improvements in their research and data analysis skills. A low dependence on using this tool for school assignments was observed (4\%), along with a lack of confidence in teachers' preparedness to effectively incorporate this technology into their classes (83\%). In conclusion, educational institutions are recommended to carefully consider integrating artificial intelligence tools in didactic exercises, taking into account the concerns expressed by the students.},
pages = {98--107},
number = {23},
journaltitle = {Revista de Investigación en Tecnologías de la Información},
shortjournal = {{RITI}},
author = {García Sánchez, Omar Vicente},
urldate = {2025-01-26},
date = {2023-06},
file = {Full text:files/661/García Sánchez - 2023 - Uso y percepción de ChatGPT en la educación superior.pdf:application/pdf},
}
@misc{aguilar_critical_2024,
title = {Critical Thinking and Ethics in the Age of Generative {AI} in Education},
rights = {https://creativecommons.org/licenses/by-nc-nd/4.0/legalcode},
url = {https://osf.io/7dr9j},
doi = {10.35542/osf.io/7dr9j},
abstract = {This report is an invitation for educators, policymakers, technologists, and learners to consider how generative {AI} can contribute to the future of education?. It aims to lay down a foundation upon which we can start building an educational ecosystem that is dynamic, inclusive, and profoundly human, despite being significantly aided by artificial intelligence?.},
publisher = {{EdArXiv}},
author = {Aguilar, Stephen J and Swartout, William and Nye, Benjamin and Sinatra, Gale Marie and Wang, Changzhao and Bui, Eric},
urldate = {2025-01-26},
date = {2024-01-29},
file = {Versione inviata:files/663/Aguilar et al. - 2024 - Critical Thinking and Ethics in the Age of Generative AI in Education.pdf:application/pdf},
}
@article{markauskaite_rethinking_2022,
title = {Rethinking the entwinement between artificial intelligence and human learning: What capabilities do learners need for a world with {AI}?},
volume = {3},
issn = {2666920X},
url = {https://linkinghub.elsevier.com/retrieve/pii/S2666920X2200011X},
doi = {10.1016/j.caeai.2022.100056},
shorttitle = {Rethinking the entwinement between artificial intelligence and human learning},
pages = {100056},
journaltitle = {Computers and Education: Artificial Intelligence},
shortjournal = {Computers and Education: Artificial Intelligence},
author = {Markauskaite, Lina and Marrone, Rebecca and Poquet, Oleksandra and Knight, Simon and Martinez-Maldonado, Roberto and Howard, Sarah and Tondeur, Jo and De Laat, Maarten and Buckingham Shum, Simon and Gašević, Dragan and Siemens, George},
urldate = {2025-01-26},
date = {2022},
langid = {english},
file = {Full text:files/665/Markauskaite et al. - 2022 - Rethinking the entwinement between artificial intelligence and human learning What capabilities do.pdf:application/pdf},
}
@misc{xu_hallucination_2024,
title = {Hallucination is Inevitable: An Innate Limitation of Large Language Models},
rights = {{arXiv}.org perpetual, non-exclusive license},
url = {https://arxiv.org/abs/2401.11817},
doi = {10.48550/ARXIV.2401.11817},
shorttitle = {Hallucination is Inevitable},
abstract = {Hallucination has been widely recognized to be a significant drawback for large language models ({LLMs}). There have been many works that attempt to reduce the extent of hallucination. These efforts have mostly been empirical so far, which cannot answer the fundamental question whether it can be completely eliminated. In this paper, we formalize the problem and show that it is impossible to eliminate hallucination in {LLMs}. Specifically, we define a formal world where hallucination is defined as inconsistencies between a computable {LLM} and a computable ground truth function. By employing results from learning theory, we show that {LLMs} cannot learn all of the computable functions and will therefore always hallucinate. Since the formal world is a part of the real world which is much more complicated, hallucinations are also inevitable for real world {LLMs}. Furthermore, for real world {LLMs} constrained by provable time complexity, we describe the hallucination-prone tasks and empirically validate our claims. Finally, using the formal world framework, we discuss the possible mechanisms and efficacies of existing hallucination mitigators as well as the practical implications on the safe deployment of {LLMs}.},
publisher = {{arXiv}},
author = {Xu, Ziwei and Jain, Sanjay and Kankanhalli, Mohan},
urldate = {2025-01-26},
date = {2024},
note = {Version Number: 1},
keywords = {Artificial Intelligence (cs.{AI}), Computation and Language (cs.{CL}), {FOS}: Computer and information sciences, Machine Learning (cs.{LG})},
}
@article{chan_students_2023,
title = {Students voices on generative {AI}: perceptions, benefits, and challenges in higher education},
volume = {20},
issn = {2365-9440},
url = {https://educationaltechnologyjournal.springeropen.com/articles/10.1186/s41239-023-00411-8},
doi = {10.1186/s41239-023-00411-8},
shorttitle = {Students voices on generative {AI}},
abstract = {Abstract
This study explores university students perceptions of generative {AI} ({GenAI}) technologies, such as {ChatGPT}, in higher education, focusing on familiarity, their willingness to engage, potential benefits and challenges, and effective integration. A survey of 399 undergraduate and postgraduate students from various disciplines in Hong Kong revealed a generally positive attitude towards {GenAI} in teaching and learning. Students recognized the potential for personalized learning support, writing and brainstorming assistance, and research and analysis capabilities. However, concerns about accuracy, privacy, ethical issues, and the impact on personal development, career prospects, and societal values were also expressed. According to John Biggs 3P model, student perceptions significantly influence learning approaches and outcomes. By understanding students perceptions, educators and policymakers can tailor {GenAI} technologies to address needs and concerns while promoting effective learning outcomes. Insights from this study can inform policy development around the integration of {GenAI} technologies into higher education. By understanding students perceptions and addressing their concerns, policymakers can create well-informed guidelines and strategies for the responsible and effective implementation of {GenAI} tools, ultimately enhancing teaching and learning experiences in higher education.},
pages = {43},
number = {1},
journaltitle = {International Journal of Educational Technology in Higher Education},
shortjournal = {Int J Educ Technol High Educ},
author = {Chan, Cecilia Ka Yuk and Hu, Wenjie},
urldate = {2025-01-26},
date = {2023-07-17},
langid = {english},
file = {Full text:files/668/Chan e Hu - 2023 - Students voices on generative AI perceptions, benefits, and challenges in higher education.pdf:application/pdf},
}
@misc{dao_investigating_2023,
title = {Investigating the Effectiveness of {ChatGPT} in Mathematical Reasoning and Problem Solving: Evidence from the Vietnamese National High School Graduation Examination},
rights = {Creative Commons Attribution Share Alike 4.0 International},
url = {https://arxiv.org/abs/2306.06331},
doi = {10.48550/ARXIV.2306.06331},
shorttitle = {Investigating the Effectiveness of {ChatGPT} in Mathematical Reasoning and Problem Solving},
abstract = {This study offers a complete analysis of {ChatGPT}'s mathematics abilities in responding to multiple-choice questions for the Vietnamese National High School Graduation Examination ({VNHSGE}) on a range of subjects and difficulty levels. The dataset included 250 questions divided into four levels: knowledge (K), comprehension (C), application (A), and high application (H), and it included ten themes that covered diverse mathematical concepts. The outcomes demonstrate that {ChatGPT}'s performance varies depending on the difficulty level and subject. It performed best on questions at Level (K), with an accuracy rate of \$83{\textbackslash}\%\$; but, as the difficulty level rose, it scored poorly, with an accuracy rate of \$10{\textbackslash}\%\$. The study has also shown that {ChatGPT} significantly succeeds in providing responses to questions on subjects including exponential and logarithmic functions, geometric progression, and arithmetic progression. The study found that {ChatGPT} had difficulty correctly answering questions on topics including derivatives and applications, spatial geometry, and Oxyz spatial calculus. Additionally, this study contrasted {ChatGPT} outcomes with Vietnamese students in {VNHSGE} and in other math competitions. {ChatGPT} dominated in the {SAT} Math competition with a success rate of \$70{\textbackslash}\%\$, followed by {VNHSGE} mathematics (\$58.8{\textbackslash}\%)\$. However, its success rates were lower on other exams, such as {AP} Statistics, the {GRE} Quantitative, {AMC} 10, {AMC} 12, and {AP} Calculus {BC}. These results suggest that {ChatGPT} has the potential to be an effective teaching tool for mathematics, but more work is needed to enhance its handling of graphical data and address the challenges presented by questions that are getting more challenging.},
publisher = {{arXiv}},
author = {Dao, Xuan-Quy and Le, Ngoc-Bich},
urldate = {2025-01-26},
date = {2023},
note = {Version Number: 3},
keywords = {Computation and Language (cs.{CL}), {FOS}: Computer and information sciences, Machine Learning (cs.{LG})},
}
@misc{hsieh_ruler_2024,
title = {{RULER}: What's the Real Context Size of Your Long-Context Language Models?},
rights = {Creative Commons Attribution 4.0 International},
url = {https://arxiv.org/abs/2404.06654},
doi = {10.48550/ARXIV.2404.06654},
shorttitle = {{RULER}},
abstract = {The needle-in-a-haystack ({NIAH}) test, which examines the ability to retrieve a piece of information (the "needle") from long distractor texts (the "haystack"), has been widely adopted to evaluate long-context language models ({LMs}). However, this simple retrieval-based test is indicative of only a superficial form of long-context understanding. To provide a more comprehensive evaluation of long-context {LMs}, we create a new synthetic benchmark {RULER} with flexible configurations for customized sequence length and task complexity. {RULER} expands upon the vanilla {NIAH} test to encompass variations with diverse types and quantities of needles. Moreover, {RULER} introduces new task categories multi-hop tracing and aggregation to test behaviors beyond searching from context. We evaluate 17 long-context {LMs} with 13 representative tasks in {RULER}. Despite achieving nearly perfect accuracy in the vanilla {NIAH} test, almost all models exhibit large performance drops as the context length increases. While these models all claim context sizes of 32K tokens or greater, only half of them can maintain satisfactory performance at the length of 32K. Our analysis of Yi-34B, which supports context length of 200K, reveals large room for improvement as we increase input length and task complexity. We open source {RULER} to spur comprehensive evaluation of long-context {LMs}.},
publisher = {{arXiv}},
author = {Hsieh, Cheng-Ping and Sun, Simeng and Kriman, Samuel and Acharya, Shantanu and Rekesh, Dima and Jia, Fei and Zhang, Yang and Ginsburg, Boris},
urldate = {2025-01-26},
date = {2024},
note = {Version Number: 3},
keywords = {Computation and Language (cs.{CL}), {FOS}: Computer and information sciences},
}
@misc{kojima_large_2022,
title = {Large Language Models are Zero-Shot Reasoners},
rights = {{arXiv}.org perpetual, non-exclusive license},
url = {https://arxiv.org/abs/2205.11916},
doi = {10.48550/ARXIV.2205.11916},
abstract = {Pretrained large language models ({LLMs}) are widely used in many sub-fields of natural language processing ({NLP}) and generally known as excellent few-shot learners with task-specific exemplars. Notably, chain of thought ({CoT}) prompting, a recent technique for eliciting complex multi-step reasoning through step-by-step answer examples, achieved the state-of-the-art performances in arithmetics and symbolic reasoning, difficult system-2 tasks that do not follow the standard scaling laws for {LLMs}. While these successes are often attributed to {LLMs}' ability for few-shot learning, we show that {LLMs} are decent zero-shot reasoners by simply adding "Let's think step by step" before each answer. Experimental results demonstrate that our Zero-shot-{CoT}, using the same single prompt template, significantly outperforms zero-shot {LLM} performances on diverse benchmark reasoning tasks including arithmetics ({MultiArith}, {GSM}8K, {AQUA}-{RAT}, {SVAMP}), symbolic reasoning (Last Letter, Coin Flip), and other logical reasoning tasks (Date Understanding, Tracking Shuffled Objects), without any hand-crafted few-shot examples, e.g. increasing the accuracy on {MultiArith} from 17.7\% to 78.7\% and {GSM}8K from 10.4\% to 40.7\% with large {InstructGPT} model (text-davinci-002), as well as similar magnitudes of improvements with another off-the-shelf large model, 540B parameter {PaLM}. The versatility of this single prompt across very diverse reasoning tasks hints at untapped and understudied fundamental zero-shot capabilities of {LLMs}, suggesting high-level, multi-task broad cognitive capabilities may be extracted by simple prompting. We hope our work not only serves as the minimal strongest zero-shot baseline for the challenging reasoning benchmarks, but also highlights the importance of carefully exploring and analyzing the enormous zero-shot knowledge hidden inside {LLMs} before crafting finetuning datasets or few-shot exemplars.},
publisher = {{arXiv}},
author = {Kojima, Takeshi and Gu, Shixiang Shane and Reid, Machel and Matsuo, Yutaka and Iwasawa, Yusuke},
urldate = {2025-01-26},
date = {2022},
note = {Version Number: 4},
keywords = {Artificial Intelligence (cs.{AI}), Computation and Language (cs.{CL}), {FOS}: Computer and information sciences, Machine Learning (cs.{LG})},
}
@inproceedings{liu_generated_2022,
location = {Dublin, Ireland},
title = {Generated Knowledge Prompting for Commonsense Reasoning},
url = {https://aclanthology.org/2022.acl-long.225},
doi = {10.18653/v1/2022.acl-long.225},
eventtitle = {Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
pages = {3154--3169},
booktitle = {Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
publisher = {Association for Computational Linguistics},
author = {Liu, Jiacheng and Liu, Alisa and Lu, Ximing and Welleck, Sean and West, Peter and Le Bras, Ronan and Choi, Yejin and Hajishirzi, Hannaneh},
urldate = {2025-01-26},
date = {2022},
langid = {english},
file = {Full text:files/676/Liu et al. - 2022 - Generated Knowledge Prompting for Commonsense Reasoning.pdf:application/pdf},
}
@article{makrygiannakis_evidence-based_2024,
title = {Evidence-based potential of generative artificial intelligence large language models in orthodontics: a comparative study of {ChatGPT}, Google Bard, and Microsoft Bing},
rights = {https://creativecommons.org/licenses/by/4.0/},
issn = {0141-5387, 1460-2210},
url = {https://academic.oup.com/ejo/advance-article/doi/10.1093/ejo/cjae017/7645326},
doi = {10.1093/ejo/cjae017},
shorttitle = {Evidence-based potential of generative artificial intelligence large language models in orthodontics},
abstract = {Summary
Background
The increasing utilization of large language models ({LLMs}) in Generative Artificial Intelligence across various medical and dental fields, and specifically orthodontics, raises questions about their accuracy.
Objective
This study aimed to assess and compare the answers offered by four {LLMs}: Googles Bard, {OpenAI}s {ChatGPT}-3.5, and {ChatGPT}-4, and Microsofts Bing, in response to clinically relevant questions within the field of orthodontics.
Materials and methods
Ten open-type clinical orthodontics-related questions were posed to the {LLMs}. The responses provided by the {LLMs} were assessed on a scale ranging from 0 (minimum) to 10 (maximum) points, benchmarked against robust scientific evidence, including consensus statements and systematic reviews, using a predefined rubric. After a 4-week interval from the initial evaluation, the answers were reevaluated to gauge intra-evaluator reliability. Statistical comparisons were conducted on the scores using Friedmans and Wilcoxons tests to identify the model providing the answers with the most comprehensiveness, scientific accuracy, clarity, and relevance.
Results
Overall, no statistically significant differences between the scores given by the two evaluators, on both scoring occasions, were detected, so an average score for every {LLM} was computed. The {LLM} answers scoring the highest, were those of Microsoft Bing Chat (average score=7.1), followed by {ChatGPT} 4 (average score=4.7), Google Bard (average score=4.6), and finally {ChatGPT} 3.5 (average score 3.8). While Microsoft Bing Chat statistically outperformed {ChatGPT}-3.5 (P-value=0.017) and Google Bard (P-value=0.029), as well, and Chat {GPT}-4 outperformed Chat {GPT}-3.5 (P-value=0.011), all models occasionally produced answers with a lack of comprehensiveness, scientific accuracy, clarity, and relevance.
Limitations
The questions asked were indicative and did not cover the entire field of orthodontics.
Conclusions
Language models ({LLMs}) show great potential in supporting evidence-based orthodontics. However, their current limitations pose a potential risk of making incorrect healthcare decisions if utilized without careful consideration. Consequently, these tools cannot serve as a substitute for the orthodontists essential critical thinking and comprehensive subject knowledge. For effective integration into practice, further research, clinical validation, and enhancements to the models are essential. Clinicians must be mindful of the limitations of {LLMs}, as their imprudent utilization could have adverse effects on patient care.},
pages = {cjae017},
journaltitle = {European Journal of Orthodontics},
author = {Makrygiannakis, Miltiadis A and Giannakopoulos, Kostis and Kaklamanos, Eleftherios G},
urldate = {2025-01-26},
date = {2024-04-13},
langid = {english},
file = {Full text:files/678/Makrygiannakis et al. - 2024 - Evidence-based potential of generative artificial intelligence large language models in orthodontics.pdf:application/pdf},
}
@misc{min_rethinking_2022,
title = {Rethinking the Role of Demonstrations: What Makes In-Context Learning Work?},
rights = {Creative Commons Attribution 4.0 International},
url = {https://arxiv.org/abs/2202.12837},
doi = {10.48550/ARXIV.2202.12837},
shorttitle = {Rethinking the Role of Demonstrations},
abstract = {Large language models ({LMs}) are able to in-context learn -- perform a new task via inference alone by conditioning on a few input-label pairs (demonstrations) and making predictions for new inputs. However, there has been little understanding of how the model learns and which aspects of the demonstrations contribute to end task performance. In this paper, we show that ground truth demonstrations are in fact not required -- randomly replacing labels in the demonstrations barely hurts performance on a range of classification and multi-choce tasks, consistently over 12 different models including {GPT}-3. Instead, we find that other aspects of the demonstrations are the key drivers of end task performance, including the fact that they provide a few examples of (1) the label space, (2) the distribution of the input text, and (3) the overall format of the sequence. Together, our analysis provides a new way of understanding how and why in-context learning works, while opening up new questions about how much can be learned from large language models through inference alone.},
publisher = {{arXiv}},
author = {Min, Sewon and Lyu, Xinxi and Holtzman, Ari and Artetxe, Mikel and Lewis, Mike and Hajishirzi, Hannaneh and Zettlemoyer, Luke},
urldate = {2025-01-26},
date = {2022},
note = {Version Number: 2},
keywords = {Artificial Intelligence (cs.{AI}), Computation and Language (cs.{CL}), {FOS}: Computer and information sciences},
}
@article{pimbblet_can_2025,
title = {Can {ChatGPT} pass a physics degree? Making a case for reformation of assessment of undergraduate degrees},
volume = {46},
issn = {0143-0807, 1361-6404},
url = {https://iopscience.iop.org/article/10.1088/1361-6404/ad9874},
doi = {10.1088/1361-6404/ad9874},
shorttitle = {Can {ChatGPT} pass a physics degree?},
abstract = {Abstract
The emergence of conversational natural language processing models presents a significant challenge for Higher Education. In this work, we use the entirety of a {UK} Physics undergraduate ({BSc} with Honours) degree including all examinations and coursework to test if {ChatGPT} ({GPT}-4) can pass a degree. We adopt a maximal cheating approach wherein we permit ourselves to modify questions for clarity, split question up into smaller sub-components, expand on answers given—especially for long form written responses, obtaining references, and use of advanced coaching, plug-ins and custom instructions to optimize outputs. In general, there are only certain parts of the degree in question where {GPT}-4 fails. Explicitly these include compulsory laboratory elements, and the final project which is assessed by a viva. If these were no issue, then {GPT}-4 would pass with a grade of an upper second class overall. In general, coding tasks are performed exceptionally well, along with simple single-step solution problems. Multiple step problems and longer prose are generally poorer along with interdisciplinary problems. We strongly suggest that there is now a necessity to urgently re-think and revise assessment practice in physics—and other disciplines—due to the existence of {AI} such as {GPT}-4. We recommend close scrutiny of assessment tasks: only invigilated in-person examinations, vivas, laboratory skills testing (or performances in other disciplines), and presentations are not vulnerable to {GPT}-4, and urge consideration of how {AI} can be embedded within the disciplinary context.},
pages = {015702},
number = {1},
journaltitle = {European Journal of Physics},
shortjournal = {Eur. J. Phys.},
author = {Pimbblet, K A and Morrell, L J},
urldate = {2025-01-26},
date = {2025-01-31},
}
@article{stribling_model_2024,
title = {The model student: {GPT}-4 performance on graduate biomedical science exams},
volume = {14},
issn = {2045-2322},
url = {https://www.nature.com/articles/s41598-024-55568-7},
doi = {10.1038/s41598-024-55568-7},
shorttitle = {The model student},
abstract = {Abstract
The {GPT}-4 large language model ({LLM}) and {ChatGPT} chatbot have emerged as accessible and capable tools for generating English-language text in a variety of formats. {GPT}-4 has previously performed well when applied to questions from multiple standardized examinations. However, further evaluation of trustworthiness and accuracy of {GPT}-4 responses across various knowledge domains is essential before its use as a reference resource. Here, we assess {GPT}-4 performance on nine graduate-level examinations in the biomedical sciences (seven blinded), finding that {GPT}-4 scores exceed the student average in seven of nine cases and exceed all student scores for four exams. {GPT}-4 performed very well on fill-in-the-blank, short-answer, and essay questions, and correctly answered several questions on figures sourced from published manuscripts. Conversely, {GPT}-4 performed poorly on questions with figures containing simulated data and those requiring a hand-drawn answer. Two {GPT}-4 answer-sets were flagged as plagiarism based on answer similarity and some model responses included detailed hallucinations. In addition to assessing {GPT}-4 performance, we discuss patterns and limitations in {GPT}-4 capabilities with the goal of informing design of future academic examinations in the chatbot era.},
pages = {5670},
number = {1},
journaltitle = {Scientific Reports},
shortjournal = {Sci Rep},
author = {Stribling, Daniel and Xia, Yuxing and Amer, Maha K. and Graim, Kiley S. and Mulligan, Connie J. and Renne, Rolf},
urldate = {2025-01-26},
date = {2024-03-07},
langid = {english},
file = {Full text:files/683/Stribling et al. - 2024 - The model student GPT-4 performance on graduate biomedical science exams.pdf:application/pdf},
}
@misc{vaswani_attention_2017,
title = {Attention Is All You Need},
rights = {{arXiv}.org perpetual, non-exclusive license},
url = {https://arxiv.org/abs/1706.03762},
doi = {10.48550/ARXIV.1706.03762},
abstract = {The dominant sequence transduction models are based on complex recurrent or convolutional neural networks in an encoder-decoder configuration. The best performing models also connect the encoder and decoder through an attention mechanism. We propose a new simple network architecture, the Transformer, based solely on attention mechanisms, dispensing with recurrence and convolutions entirely. Experiments on two machine translation tasks show these models to be superior in quality while being more parallelizable and requiring significantly less time to train. Our model achieves 28.4 {BLEU} on the {WMT} 2014 English-to-German translation task, improving over the existing best results, including ensembles by over 2 {BLEU}. On the {WMT} 2014 English-to-French translation task, our model establishes a new single-model state-of-the-art {BLEU} score of 41.8 after training for 3.5 days on eight {GPUs}, a small fraction of the training costs of the best models from the literature. We show that the Transformer generalizes well to other tasks by applying it successfully to English constituency parsing both with large and limited training data.},
publisher = {{arXiv}},
author = {Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N. and Kaiser, Lukasz and Polosukhin, Illia},
urldate = {2025-01-26},
date = {2017},
note = {Version Number: 7},
keywords = {Computation and Language (cs.{CL}), {FOS}: Computer and information sciences, Machine Learning (cs.{LG})},
}
@misc{wang_searching_2024,
title = {Searching for Best Practices in Retrieval-Augmented Generation},
rights = {Creative Commons Attribution 4.0 International},
url = {https://arxiv.org/abs/2407.01219},
doi = {10.48550/ARXIV.2407.01219},
abstract = {Retrieval-augmented generation ({RAG}) techniques have proven to be effective in integrating up-to-date information, mitigating hallucinations, and enhancing response quality, particularly in specialized domains. While many {RAG} approaches have been proposed to enhance large language models through query-dependent retrievals, these approaches still suffer from their complex implementation and prolonged response times. Typically, a {RAG} workflow involves multiple processing steps, each of which can be executed in various ways. Here, we investigate existing {RAG} approaches and their potential combinations to identify optimal {RAG} practices. Through extensive experiments, we suggest several strategies for deploying {RAG} that balance both performance and efficiency. Moreover, we demonstrate that multimodal retrieval techniques can significantly enhance question-answering capabilities about visual inputs and accelerate the generation of multimodal content using a "retrieval as generation" strategy.},
publisher = {{arXiv}},
author = {Wang, Xiaohua and Wang, Zhenghua and Gao, Xuan and Zhang, Feiran and Wu, Yixin and Xu, Zhibo and Shi, Tianyuan and Wang, Zhengyuan and Li, Shizheng and Qian, Qi and Yin, Ruicheng and Lv, Changze and Zheng, Xiaoqing and Huang, Xuanjing},
urldate = {2025-01-26},
date = {2024},
note = {Version Number: 1},
keywords = {Computation and Language (cs.{CL}), {FOS}: Computer and information sciences},
}
@misc{wei_chain--thought_2022,
title = {Chain-of-Thought Prompting Elicits Reasoning in Large Language Models},
rights = {Creative Commons Attribution 4.0 International},
url = {https://arxiv.org/abs/2201.11903},
doi = {10.48550/ARXIV.2201.11903},
abstract = {We explore how generating a chain of thought -- a series of intermediate reasoning steps -- significantly improves the ability of large language models to perform complex reasoning. In particular, we show how such reasoning abilities emerge naturally in sufficiently large language models via a simple method called chain of thought prompting, where a few chain of thought demonstrations are provided as exemplars in prompting. Experiments on three large language models show that chain of thought prompting improves performance on a range of arithmetic, commonsense, and symbolic reasoning tasks. The empirical gains can be striking. For instance, prompting a 540B-parameter language model with just eight chain of thought exemplars achieves state of the art accuracy on the {GSM}8K benchmark of math word problems, surpassing even finetuned {GPT}-3 with a verifier.},
publisher = {{arXiv}},
author = {Wei, Jason and Wang, Xuezhi and Schuurmans, Dale and Bosma, Maarten and Ichter, Brian and Xia, Fei and Chi, Ed and Le, Quoc and Zhou, Denny},
urldate = {2025-01-26},
date = {2022},
note = {Version Number: 6},
keywords = {Artificial Intelligence (cs.{AI}), Computation and Language (cs.{CL}), {FOS}: Computer and information sciences},
}
@article{yu_reflection_2023,
title = {Reflection on whether Chat {GPT} should be banned by academia from the perspective of education and teaching},
volume = {14},
issn = {1664-1078},
url = {https://www.frontiersin.org/articles/10.3389/fpsyg.2023.1181712/full},
doi = {10.3389/fpsyg.2023.1181712},
pages = {1181712},
journaltitle = {Frontiers in Psychology},
shortjournal = {Front. Psychol.},
author = {Yu, Hao},
urldate = {2025-01-26},
date = {2023-06-01},
file = {Full text:files/689/Yu - 2023 - Reflection on whether Chat GPT should be banned by academia from the perspective of education and te.pdf:application/pdf},
}
@misc{shi_detecting_2023,
title = {Detecting Pretraining Data from Large Language Models},
rights = {Creative Commons Attribution 4.0 International},
url = {https://arxiv.org/abs/2310.16789},
doi = {10.48550/ARXIV.2310.16789},
abstract = {Although large language models ({LLMs}) are widely deployed, the data used to train them is rarely disclosed. Given the incredible scale of this data, up to trillions of tokens, it is all but certain that it includes potentially problematic text such as copyrighted materials, personally identifiable information, and test data for widely reported reference benchmarks. However, we currently have no way to know which data of these types is included or in what proportions. In this paper, we study the pretraining data detection problem: given a piece of text and black-box access to an {LLM} without knowing the pretraining data, can we determine if the model was trained on the provided text? To facilitate this study, we introduce a dynamic benchmark {WIKIMIA} that uses data created before and after model training to support gold truth detection. We also introduce a new detection method Min-K\% Prob based on a simple hypothesis: an unseen example is likely to contain a few outlier words with low probabilities under the {LLM}, while a seen example is less likely to have words with such low probabilities. Min-K\% Prob can be applied without any knowledge about the pretraining corpus or any additional training, departing from previous detection methods that require training a reference model on data that is similar to the pretraining data. Moreover, our experiments demonstrate that Min-K\% Prob achieves a 7.4\% improvement on {WIKIMIA} over these previous methods. We apply Min-K\% Prob to three real-world scenarios, copyrighted book detection, contaminated downstream example detection and privacy auditing of machine unlearning, and find it a consistently effective solution.},
publisher = {{arXiv}},
author = {Shi, Weijia and Ajith, Anirudh and Xia, Mengzhou and Huang, Yangsibo and Liu, Daogao and Blevins, Terra and Chen, Danqi and Zettlemoyer, Luke},
urldate = {2025-01-26},
date = {2023},
note = {Version Number: 3},
keywords = {Computation and Language (cs.{CL}), Cryptography and Security (cs.{CR}), {FOS}: Computer and information sciences, Machine Learning (cs.{LG})},
}
@article{fernandes_ai_2024,
title = {{AI} Training and Copyright: Should Intellectual Property Law Allow Machines to Learn?},
volume = {10},
rights = {http://creativecommons.org/licenses/by/4.0},
issn = {2653-8660},
url = {https://ejournals.epublishing.ekt.gr/index.php/bioethica/article/view/39041},
doi = {10.12681/bioeth.39041},
shorttitle = {{AI} Training and Copyright},
abstract = {This article examines the intricate legal landscape surrounding the use of copyrighted materials in the development of artificial intelligence ({AI}). It explores the rise of {AI} and its reliance on data, emphasizing the importance of data availability for machine learning ({ML}) systems. The article analyzes current relevant legislation across the European Union, United States, and Japan, highlighting the legal ambiguities and constraints posed by {IP} rights, particularly copyright. It discusses possible new solutions, referencing the World Intellectual Property Organization's ({WIPO}) call for discussions on {AI} and {IP} policy. The conclusion stresses the need to balance the interests of {AI} developers and {IP} rights holders to promote technological advancement while safeguarding creativity and originality.},
pages = {8--21},
number = {2},
journaltitle = {Bioethica},
shortjournal = {Bioethica},
author = {Fernandes, Pedro Martins},
urldate = {2025-01-26},
date = {2024-10-01},
file = {Full Text PDF:files/692/Fernandes - 2024 - AI Training and Copyright Should Intellectual Property Law Allow Machines to Learn.pdf:application/pdf},
}
@article{buick_copyright_2024,
title = {Copyright and {AI} training data—transparency to the rescue?},
rights = {https://creativecommons.org/licenses/by-nc-nd/4.0/},
issn = {1747-1532, 1747-1540},
url = {https://academic.oup.com/jiplp/advance-article/doi/10.1093/jiplp/jpae102/7922541},
doi = {10.1093/jiplp/jpae102},
abstract = {Abstract
Generative Artificial Intelligence ({AI}) models must be trained on vast quantities of data, much of which is composed of copyrighted material. However, {AI} developers frequently use such content without seeking permission from rightsholders, leading to calls for requirements to disclose information on the contents of {AI} training data. These demands have won an early success through the inclusion of such requirements in the {EU}s {AI} Act. This article argues that such transparency requirements alone cannot rescue us from the difficult question of how best to respond to the fundamental challenges generative {AI} poses to copyright law. This is because the impact of transparency requirements is contingent on existing copyright laws; if these do not adequately address the challenges presented by generative {AI}, transparency will not provide a solution. This is exemplified by the transparency requirements of the {AI} Act, which are explicitly designed to facilitate the enforcement of the right to opt-out of text and data mining under the Copyright in the Digital Single Market Directive. Because the transparency requirements do not sufficiently address the underlying flaws of this opt-out, they are unlikely to provide any meaningful improvement to the position of individual rightsholders. Transparency requirements are thus a necessary but not sufficient measure to achieve a fair and equitable balance between innovation and protection for rightsholders. Policymakers must therefore look beyond such requirements and consider further action to address the complex challenge presented to copyright law by generative {AI}.},
pages = {jpae102},
journaltitle = {Journal Of Intellectual Property Law and Practice},
author = {Buick, Adam},
urldate = {2025-01-26},
date = {2024-12-12},
langid = {english},
file = {Full Text PDF:files/694/Buick - 2024 - Copyright and AI training data—transparency to the rescue.pdf:application/pdf},
}
@misc{azizy_adversarial_2024,
title = {Adversarial vulnerability following {LLM} low-resource language fine-tuning: Short report},
rights = {https://creativecommons.org/licenses/by/4.0/legalcode},
url = {https://osf.io/bzd6w},
doi = {10.31219/osf.io/bzd6w},
shorttitle = {Adversarial vulnerability following {LLM} low-resource language fine-tuning},
abstract = {We briefly report how fine-tuning a multilingual {LLM} with a low-resource language resulted in an increased vulnerability to adversarial attacks. We fine-tuned {GPT}-3.5 Turbo (gpt-3.5-0125) with 560 input-output pairs ({\textasciitilde}274k tokens) of Krama Javanese, a high register of Javanese (a low-resource language). We report brief qualitative and quantitative observations that 1. The fine-tuned model is more compliant towards adversarial prompts, 2. Unsuccessful prompts can be successful when concatenated with an elaboration string, e.g., step-by-step prompting or by specifying details, 3. The model can be prompted in the fine-tuned language to respond in English, thus providing a way to produce harmful responses in a different language. The fine-tuned model sees a 45.1\% increase of {GPT}-4-rated sum of harmfulness for Krama Javanese responses and a 13.8\% increase for English responses. Notably, all of these vulnerabilities can be reached very effectively with the benign nature and our small dataset size. Our work contributes knowledge in the intersection of {AI} safety and multilingual models, indicating that fine-tuning an {LLM} on a low-resource language should include additional data examples for retaining safety guardrails.},
publisher = {Open Science Framework},
author = {Azizy, Afrizal Hasbi and Cahyanto, Nuel Bagus},
urldate = {2025-01-26},
date = {2024-05-03},
file = {Versione inviata:files/696/Azizy e Cahyanto - 2024 - Adversarial vulnerability following LLM low-resource language fine-tuning Short report.pdf:application/pdf},
}
@article{noauthor_adversarial_2024,
title = {Adversarial Attacks on Large Language Models ({LLMs}) in Cybersecurity Applications: Detection, Mitigation, and Resilience Enhancement},
issn = {25825208},
url = {https://www.irjmets.com/uploadedfiles/paper//issue_10_october_2024/61937/final/fin_irjmets1727942590.pdf},
doi = {10.56726/IRJMETS61937},
shorttitle = {Adversarial Attacks on Large Language Models ({LLMs}) in Cybersecurity Applications},
journaltitle = {International Research Journal of Modernization in Engineering Technology and Science},
shortjournal = {{IRJMETS}},
urldate = {2025-01-26},
date = {2024-10-08},
file = {Full text:files/698/2024 - Adversarial Attacks on Large Language Models (LLMs) in Cybersecurity Applications Detection, Mitiga.pdf:application/pdf},
}
@incollection{kucharavy_exploring_2024,
location = {Cham},
title = {Exploring the Dual Role of {LLMs} in Cybersecurity: Threats and Defenses},
isbn = {978-3-031-54826-0 978-3-031-54827-7},
url = {https://link.springer.com/10.1007/978-3-031-54827-7_26},
shorttitle = {Exploring the Dual Role of {LLMs} in Cybersecurity},
abstract = {Abstract
Large Language Models
({LLMs}) pose risks for cybersecurity since they facilitate minimal cost creation of malware, phishing messages, and malicious chatbots. At the same time, {LLMs} can help defend against cyberattacks. This chapter reviews security research around the risks and benefits of {LLMs}.},
pages = {235--242},
booktitle = {Large Language Models in Cybersecurity},
publisher = {Springer Nature Switzerland},
author = {Bryce, Ciarán and Kalousis, Alexandros and Leroux, Ilan and Madinier, Hélène and Pasche, Thomas and Ruch, Patrick},
editor = {Kucharavy, Andrei and Plancherel, Octave and Mulder, Valentin and Mermoud, Alain and Lenders, Vincent},
urldate = {2025-01-26},
date = {2024},
langid = {english},
doi = {10.1007/978-3-031-54827-7_26},
file = {Full text:files/700/Bryce et al. - 2024 - Exploring the Dual Role of LLMs in Cybersecurity Threats and Defenses.pdf:application/pdf},
}
@article{gupta_chatgpt_2023,
title = {From {ChatGPT} to {ThreatGPT}: Impact of Generative {AI} in Cybersecurity and Privacy},
volume = {11},
rights = {https://creativecommons.org/licenses/by-nc-nd/4.0/},
issn = {2169-3536},
url = {https://ieeexplore.ieee.org/document/10198233/},
doi = {10.1109/ACCESS.2023.3300381},
shorttitle = {From {ChatGPT} to {ThreatGPT}},
pages = {80218--80245},
journaltitle = {{IEEE} Access},
shortjournal = {{IEEE} Access},
author = {Gupta, Maanak and Akiri, Charankumar and Aryal, Kshitiz and Parker, Eli and Praharaj, Lopamudra},
urldate = {2025-01-26},
date = {2023},
file = {Versione inviata:files/702/Gupta et al. - 2023 - From ChatGPT to ThreatGPT Impact of Generative AI in Cybersecurity and Privacy.pdf:application/pdf},
}
@misc{mahato_red_2024,
title = {Red Teaming for Multimodal Large Language Models: A Survey},
rights = {https://creativecommons.org/licenses/by/4.0/},
url = {https://www.techrxiv.org/users/717031/articles/701792-red-teaming-for-multimodal-large-language-models-a-survey?commit=78974318f47d2573ffe2d51622dee3c7268dddd5},
doi = {10.36227/techrxiv.170629758.87975697/v1},
shorttitle = {Red Teaming for Multimodal Large Language Models},
publisher = {Preprints},
author = {Mahato, Moushumi and Kumar, Avinash and Singh, Kartikey and Kukreja, Bhavesh and Nabi, Javaid},
urldate = {2025-01-26},
date = {2024-01-26},
file = {Versione inviata:files/704/Mahato et al. - 2024 - Red Teaming for Multimodal Large Language Models A Survey.pdf:application/pdf},
}
@article{menz_health_2024,
title = {Health Disinformation Use Case Highlighting the Urgent Need for Artificial Intelligence Vigilance: Weapons of Mass Disinformation},
volume = {184},
issn = {2168-6106},
url = {https://jamanetwork.com/journals/jamainternalmedicine/fullarticle/2811333},
doi = {10.1001/jamainternmed.2023.5947},
shorttitle = {Health Disinformation Use Case Highlighting the Urgent Need for Artificial Intelligence Vigilance},
abstract = {Importance
Although artificial intelligence ({AI}) offers many promises across modern medicine, it may carry a significant risk for the mass generation of targeted health disinformation. This poses an urgent threat toward public health initiatives and calls for rapid attention by health care professionals, {AI} developers, and regulators to ensure public safety.
Observations
As an example, using a single publicly available large-language model, within 65 minutes, 102 distinct blog articles were generated that contained more than 17000 words of disinformation related to vaccines and vaping. Each post was coercive and targeted at diverse societal groups, including young adults, young parents, older persons, pregnant people, and those with chronic health conditions. The blogs included fake patient and clinician testimonials and obeyed prompting for the inclusion of scientific-looking referencing. Additional generative {AI} tools created an accompanying 20 realistic images in less than 2 minutes. This process was undertaken by health care professionals and researchers with no specialized knowledge in bypassing {AI} guardrails, relying solely on publicly available information.
Conclusions and Relevance
These observations demonstrate that when the guardrails of {AI} tools are insufficient, the ability to rapidly generate diverse and large amounts of convincing disinformation is profound. Beyond providing 2 example scenarios, these findings demonstrate an urgent need for robust {AI} vigilance. The {AI} tools are rapidly progressing; alongside these advancements, emergent risks are becoming increasingly apparent. Key pillars of pharmacovigilance—including transparency, surveillance, and regulation—may serve as valuable examples for managing these risks and safeguarding public health.},
pages = {92},
number = {1},
journaltitle = {{JAMA} Internal Medicine},
shortjournal = {{JAMA} Intern Med},
author = {Menz, Bradley D. and Modi, Natansh D. and Sorich, Michael J. and Hopkins, Ashley M.},
urldate = {2025-01-26},
date = {2024-01-01},
langid = {english},
}
@article{qi_visual_2024,
title = {Visual Adversarial Examples Jailbreak Aligned Large Language Models},
volume = {38},
issn = {2374-3468, 2159-5399},
url = {https://ojs.aaai.org/index.php/AAAI/article/view/30150},
doi = {10.1609/aaai.v38i19.30150},
abstract = {Warning: this paper contains data, prompts, and model outputs that are offensive in nature.
Recently, there has been a surge of interest in integrating vision into Large Language Models ({LLMs}), exemplified by Visual Language Models ({VLMs}) such as Flamingo and {GPT}-4. This paper sheds light on the security and safety implications of this trend. First, we underscore that the continuous and high-dimensional nature of the visual input makes it a weak link against adversarial attacks, representing an expanded attack surface of vision-integrated {LLMs}. Second, we highlight that the versatility of {LLMs} also presents visual attackers with a wider array of achievable adversarial objectives, extending the implications of security failures beyond mere misclassification. As an illustration, we present a case study in which we exploit visual adversarial examples to circumvent the safety guardrail of aligned {LLMs} with integrated vision. Intriguingly, we discover that a single visual adversarial example can universally jailbreak an aligned {LLM}, compelling it to heed a wide range of harmful instructions (that it otherwise would not) and generate harmful content that transcends the narrow scope of a `few-shot' derogatory corpus initially employed to optimize the adversarial example. Our study underscores the escalating adversarial risks associated with the pursuit of multimodality. Our findings also connect the long-studied adversarial vulnerabilities of neural networks to the nascent field of {AI} alignment. The presented attack suggests a fundamental adversarial challenge for {AI} alignment, especially in light of the emerging trend toward multimodality in frontier foundation models.},
pages = {21527--21536},
number = {19},
journaltitle = {Proceedings of the {AAAI} Conference on Artificial Intelligence},
shortjournal = {{AAAI}},
author = {Qi, Xiangyu and Huang, Kaixuan and Panda, Ashwinee and Henderson, Peter and Wang, Mengdi and Mittal, Prateek},
urldate = {2025-01-26},
date = {2024-03-24},
file = {Full text:files/707/Qi et al. - 2024 - Visual Adversarial Examples Jailbreak Aligned Large Language Models.pdf:application/pdf},
}
@inproceedings{roy_probing_2023,
location = {Singapore},
title = {Probing {LLMs} for hate speech detection: strengths and vulnerabilities},
url = {https://aclanthology.org/2023.findings-emnlp.407},
doi = {10.18653/v1/2023.findings-emnlp.407},
shorttitle = {Probing {LLMs} for hate speech detection},
eventtitle = {Findings of the Association for Computational Linguistics: {EMNLP} 2023},
pages = {6116--6128},
booktitle = {Findings of the Association for Computational Linguistics: {EMNLP} 2023},
publisher = {Association for Computational Linguistics},
author = {Roy, Sarthak and Harshvardhan, Ashish and Mukherjee, Animesh and Saha, Punyajoy},
urldate = {2025-01-26},
date = {2023},
langid = {english},
file = {Full text:files/709/Roy et al. - 2023 - Probing LLMs for hate speech detection strengths and vulnerabilities.pdf:application/pdf},
}
@article{takemoto_all_2024,
title = {All in How You Ask for It: Simple Black-Box Method for Jailbreak Attacks},
volume = {14},
rights = {https://creativecommons.org/licenses/by/4.0/},
issn = {2076-3417},
url = {https://www.mdpi.com/2076-3417/14/9/3558},
doi = {10.3390/app14093558},
shorttitle = {All in How You Ask for It},
abstract = {Large Language Models ({LLMs}), such as {ChatGPT}, encounter jailbreak challenges, wherein safeguards are circumvented to generate ethically harmful prompts. This study introduces a straightforward black-box method for efficiently crafting jailbreak prompts that bypass {LLM} defenses. Our technique iteratively transforms harmful prompts into benign expressions directly utilizing the target {LLM}, predicated on the hypothesis that {LLMs} can autonomously generate expressions that evade safeguards. Through experiments conducted with {ChatGPT} ({GPT}-3.5 and {GPT}-4) and Gemini-Pro, our method consistently achieved an attack success rate exceeding 80\% within an average of five iterations for forbidden questions and proved to be robust against model updates. The jailbreak prompts generated were not only naturally worded and succinct, but also challenging to defend against. These findings suggest that the creation of effective jailbreak prompts is less complex than previously believed, underscoring the heightened risk posed by black-box jailbreak attacks.},
pages = {3558},
number = {9},
journaltitle = {Applied Sciences},
shortjournal = {Applied Sciences},
author = {Takemoto, Kazuhiro},
urldate = {2025-01-26},
date = {2024-04-23},
langid = {english},
file = {Full text:files/711/Takemoto - 2024 - All in How You Ask for It Simple Black-Box Method for Jailbreak Attacks.pdf:application/pdf},
}
@misc{urman_silence_2023,
title = {The Silence of the {LLMs}: Cross-Lingual Analysis of Political Bias and False Information Prevalence in {ChatGPT}, Google Bard, and Bing Chat},
rights = {https://creativecommons.org/licenses/by/4.0/legalcode},
url = {https://osf.io/q9v8f},
doi = {10.31219/osf.io/q9v8f},
shorttitle = {The Silence of the {LLMs}},
abstract = {This article presents a comparative analysis of political bias in the outputs of three Large Language Model ({LLM})-based chatbots - {ChatGPT}, Bing Chat, and Bard - in response to political queries concerning the authoritarian regime in Russia. We investigate whether safeguards implemented in these chatbots contribute to the censorship of information that is viewed as harmful by the regime, in particular information about Vladimir Putin and the Russian war against Ukraine, and whether these safeguards enable the generation of false claims, in particular in relation to the regime's internal and external opponents. To detect whether {LLM} safeguards reiterate political bias, the article compares the outputs of prompts focusing on Putin's regime and the ones dealing with the Russian opposition and the {US} and Ukrainian politicians. It also examines whether the degree of bias varies depending on the language of the prompt and compares outputs concerning political personalities and issues across three languages: Russian, Ukrainian, and English. The results reveal significant disparities in how individual chatbots withhold politics-related information or produce false claims in relation to it. Notably, Bard consistently refused to respond to queries about Vladimir Putin in Russian, even when the relevant information was accessible via Google Search, and generally followed the censorship guidelines that, according to Yandex-related data leaks, were issued by the Russian authorities. In terms of false claims, we find substantial variation across languages with Ukrainian and Russian prompts generating false information more often and Bard being more prone to produce false claims in relation to Russian regime opponents (e.g., Navalny or Zelenskyy) than other chatbots. This research aims to stimulate further dialogue and research on developing safeguards against the misuse of {LLMs} outside of democratic environments.},
publisher = {Open Science Framework},
author = {Urman, Aleksandra and Makhortykh, Mykola},
urldate = {2025-01-26},
date = {2023-09-08},
file = {Versione inviata:files/713/Urman e Makhortykh - 2023 - The Silence of the LLMs Cross-Lingual Analysis of Political Bias and False Information Prevalence i.pdf:application/pdf},
}
@article{wu_harnessing_2024,
title = {Harnessing Response Consistency for Superior {LLM} Performance: The Promise and Peril of Answer-Augmented Prompting},
volume = {13},
rights = {https://creativecommons.org/licenses/by/4.0/},
issn = {2079-9292},
url = {https://www.mdpi.com/2079-9292/13/23/4581},
doi = {10.3390/electronics13234581},
shorttitle = {Harnessing Response Consistency for Superior {LLM} Performance},
abstract = {This paper introduces Answer-Augmented Prompting ({AAP}), an innovative approach that leverages the Response Consistency of History of Dialogue ({HoD}) phenomenon in Large Language Models ({LLMs}). {AAP} not only achieves significantly superior performance enhancements compared to traditional augmentation methods but also exhibits a stronger potential for “jailbreaking”, allowing models to produce unsafe or misleading responses. By strategically modifying the {HoD}, {AAP} influences {LLM} performance in a dual manner: it promotes accuracy while amplifying risks associated with bypassing built-in safeguards. Our experiments demonstrate that {AAP} outperforms standard methods in both effectiveness and the ability to elicit harmful content. To address these risks, we propose comprehensive mitigation strategies for both {LLM} service providers and end-users. This research offers valuable insights into the implications of Response Consistency in {LLMs}, underscoring the promise and peril of this powerful capability.},
pages = {4581},
number = {23},
journaltitle = {Electronics},
shortjournal = {Electronics},
author = {Wu, Hua and Hong, Haotian and Sun, Li and Bai, Xiaojing and Pu, Mengyang},
urldate = {2025-01-26},
date = {2024-11-21},
langid = {english},
file = {Full text:files/715/Wu et al. - 2024 - Harnessing Response Consistency for Superior LLM Performance The Promise and Peril of Answer-Augmen.pdf:application/pdf},
}
@inproceedings{yang_censorship_2021,
location = {Virtual Event Canada},
title = {Censorship of Online Encyclopedias: Implications for {NLP} Models},
isbn = {978-1-4503-8309-7},
url = {https://dl.acm.org/doi/10.1145/3442188.3445916},
doi = {10.1145/3442188.3445916},
shorttitle = {Censorship of Online Encyclopedias},
eventtitle = {{FAccT} '21: 2021 {ACM} Conference on Fairness, Accountability, and Transparency},
pages = {537--548},
booktitle = {Proceedings of the 2021 {ACM} Conference on Fairness, Accountability, and Transparency},
publisher = {{ACM}},
author = {Yang, Eddie and Roberts, Margaret E.},
urldate = {2025-01-26},
date = {2021-03-03},
langid = {english},
file = {Full text:files/717/Yang e Roberts - 2021 - Censorship of Online Encyclopedias Implications for NLP Models.pdf:application/pdf},
}
@misc{lin_malla_2024,
title = {Malla: Demystifying Real-world Large Language Model Integrated Malicious Services},
rights = {{arXiv}.org perpetual, non-exclusive license},
url = {https://arxiv.org/abs/2401.03315},
doi = {10.48550/ARXIV.2401.03315},
shorttitle = {Malla},
abstract = {The underground exploitation of large language models ({LLMs}) for malicious services (i.e., Malla) is witnessing an uptick, amplifying the cyber threat landscape and posing questions about the trustworthiness of {LLM} technologies. However, there has been little effort to understand this new cybercrime, in terms of its magnitude, impact, and techniques. In this paper, we conduct the first systematic study on 212 real-world Mallas, uncovering their proliferation in underground marketplaces and exposing their operational modalities. Our study discloses the Malla ecosystem, revealing its significant growth and impact on today's public {LLM} services. Through examining 212 Mallas, we uncovered eight backend {LLMs} used by Mallas, along with 182 prompts that circumvent the protective measures of public {LLM} {APIs}. We further demystify the tactics employed by Mallas, including the abuse of uncensored {LLMs} and the exploitation of public {LLM} {APIs} through jailbreak prompts. Our findings enable a better understanding of the real-world exploitation of {LLMs} by cybercriminals, offering insights into strategies to counteract this cybercrime.},
publisher = {{arXiv}},
author = {Lin, Zilong and Cui, Jian and Liao, Xiaojing and Wang, {XiaoFeng}},
urldate = {2025-01-26},
date = {2024},
note = {Version Number: 3},
keywords = {Artificial Intelligence (cs.{AI}), Cryptography and Security (cs.{CR}), {FOS}: Computer and information sciences},
}
@article{ayana_decolonizing_2024,
title = {Decolonizing global {AI} governance: assessment of the state of decolonized {AI} governance in Sub-Saharan Africa},
volume = {11},
issn = {2054-5703},
url = {https://royalsocietypublishing.org/doi/10.1098/rsos.231994},
doi = {10.1098/rsos.231994},
shorttitle = {Decolonizing global {AI} governance},
abstract = {Global artificial intelligence ({AI}) governance must prioritize equity, embrace a decolonial mindset, and provide the Global South countries the authority to spearhead solution creation. Decolonization is crucial for dismantling Western-centric cognitive frameworks and mitigating biases. Integrating a decolonial approach to {AI} governance involves recognizing persistent colonial repercussions, leading to biases in {AI} solutions and disparities in {AI} access based on gender, race, geography, income and societal factors. This paradigm shift necessitates deliberate efforts to deconstruct imperial structures governing knowledge production, perpetuating global unequal resource access and biases. This research evaluates Sub-Saharan African progress in {AI} governance decolonization, focusing on indicators like {AI} governance institutions, national strategies, sovereignty prioritization, data protection regulations, and adherence to local data usage requirements. Results show limited progress, with only Rwanda notably responsive to decolonization among the ten countries evaluated; 80\% are decolonization-aware, and one is decolonization-blind. The paper provides a detailed analysis of each nation, offering recommendations for fostering decolonization, including stakeholder involvement, addressing inequalities, promoting ethical {AI}, supporting local innovation, building regional partnerships, capacity building, public awareness, and inclusive governance. This paper contributes to elucidating the challenges and opportunities associated with decolonization in {SSA} countries, thereby enriching the ongoing discourse on global {AI} governance.},
pages = {231994},
number = {8},
journaltitle = {Royal Society Open Science},
shortjournal = {R. Soc. Open Sci.},
author = {Ayana, Gelan and Dese, Kokeb and Daba Nemomssa, Hundessa and Habtamu, Bontu and Mellado, Bruce and Badu, Kingsley and Yamba, Edmund and Faye, Sylvain Landry and Ondua, Moise and Nsagha, Dickson and Nkweteyim, Denis and Kong, Jude Dzevela},
urldate = {2025-01-26},
date = {2024-08},
langid = {english},
}
@misc{capraro_impact_2023,
title = {The impact of generative artificial intelligence on socioeconomic inequalities and policy making},
rights = {https://creativecommons.org/licenses/by/4.0/legalcode},
url = {https://osf.io/6fd2y},
doi = {10.31234/osf.io/6fd2y},
abstract = {Generative artificial intelligence has the potential to both exacerbate and ameliorate existing socioeconomic inequalities. In this article, we provide a state-of-the-art interdisciplinary overview of the potential impacts of generative {AI} on (mis)information and three information-intensive domains: work, education, and healthcare. Our goal is to highlight how generative {AI} could worsen existing inequalities while illuminating how {AI} may help mitigate pervasive social problems. In the information domain, generative {AI} can democratize content creation and access, but may dramatically expand the production and proliferation of misinformation. In the workplace, it can boost productivity and create new jobs, but the benefits will likely be distributed unevenly. In education, it offers personalized learning, but may widen the digital divide. In healthcare, it might improve diagnostics and accessibility, but could deepen pre-existing inequalities. In each section we cover a specific topic, evaluate existing research, identify critical gaps, and recommend research directions, including explicit trade-offs that complicate the derivation of a priori hypotheses. We conclude with a section highlighting the role of policymaking to maximize generative {AI}s potential to reduce inequalities while mitigating its harmful effects. We discuss strengths and weaknesses of existing policy frameworks in the European Union, the United States, and the United Kingdom, observing that each fails to fully confront the socioeconomic challenges we have identified. We propose several concrete policies that could promote shared prosperity through the advancement of generative {AI}. This article emphasizes the need for interdisciplinary collaborations to understand and address the complex challenges of generative {AI}.},
publisher = {{PsyArXiv}},
author = {Capraro, Valerio and Lentsch, Austin and Acemoglu, Daron and Akgun, Selin and Akhmedova, Aisel and Bilancini, Ennio and Bonnefon, Jean-François and Branas-Garza, Pablo and Butera, Luigi and Douglas, Karen and Everett, Jim Albert Charlton and Gigerenzer, Gerd and Greenhow, Christine and Hashimoto, Daniel and Holt-Lunstad, Julianne and Jetten, Jolanda and Johnson, Simon and Kunz, Werner and Longoni, Chiara and Lunn, Peter D and Natale, Simone and Paluch, Stefanie and Rahwan, Iyad and Selwyn, Neil and Singh, Vivek and Suri, Siddharth and Sutcliffe, Jennifer and Tomlinson, Joe and Linden, Sander Van Der and Van Lange, Paul and Wall, Friederike and Van Bavel, Jay Joseph and Viale, Riccardo},
urldate = {2025-01-26},
date = {2023-12-16},
file = {Full text:files/722/Capraro et al. - 2023 - The impact of generative artificial intelligence on socioeconomic inequalities and policy making.pdf:application/pdf},
}
@article{onyebuchi_nneamaka_chisom_review_2024,
title = {{REVIEW} {OF} {AI} {IN} {EDUCATION}: {TRANSFORMING} {LEARNING} {ENVIRONMENTS} {IN} {AFRICA}},
volume = {5},
rights = {https://creativecommons.org/licenses/by-nc/4.0},
issn = {2706-9184, 2706-9176},
url = {https://fepbl.com/index.php/ijarss/article/view/725},
doi = {10.51594/ijarss.v5i10.725},
shorttitle = {{REVIEW} {OF} {AI} {IN} {EDUCATION}},
abstract = {This study analyses artificial intelligence ({AI}'s) impact on education in Africa, focusing on personalized learning, technology integration, and challenges in educational development. This review explores the transformative role of Artificial Intelligence ({AI}) in reshaping educational landscapes across Africa. As the continent strives for inclusive and quality education, {AI} emerges as a potent tool with the potential to address educational challenges, enhance learning outcomes, and bridge existing gaps. The review delves into various applications of {AI} in education, ranging from personalized learning experiences to adaptive assessment methodologies, and examines their impact on diverse learning environments. It gives an overview of the current state of education in Africa, the review highlights the disparities in access, quality, and infrastructure. It also investigates the innovative ways in which {AI} technologies are being integrated into educational systems. {AI}-powered adaptive learning platforms, virtual tutors, and intelligent content delivery systems are analyzed for their effectiveness in catering to the diverse needs of students across the continent. The review also addresses the potential of {AI} in overcoming language barriers, promoting literacy, and fostering digital skills development. Moreover, it explores the role of {AI} in facilitating teacher support, professional development, and administrative tasks, thereby contributing to the overall improvement of the education ecosystem. Ethical considerations, privacy concerns, and the digital divide are critically examined to ensure that the integration of {AI} in education aligns with ethical standards and promotes equitable access. Case studies and pilot projects from various African countries are presented to illustrate successful implementations, challenges faced, and lessons learned. Furthermore, the review discusses the importance of collaborative efforts involving governments, educational institutions, technology developers, and the private sector. Policy recommendations and strategic initiatives are explored to guide the responsible and sustainable integration of {AI} in education across the diverse socio-economic and cultural contexts prevalent in Africa. In conclusion, the review synthesizes the current state of {AI} in education in Africa, offering insights into its potential to revolutionize learning environments. The transformative power of {AI} in addressing educational challenges and fostering a culture of continuous improvement is underscored, paving the way for a more inclusive, accessible, and innovative education landscape in the African context.
Keywords: Artificial Intelligence, Education, Transform Learning, Environments, Africa.},
pages = {637--654},
number = {10},
journaltitle = {International Journal of Applied Research in Social Sciences},
shortjournal = {Int. j. appl. res. soc. sci.},
author = {{Onyebuchi Nneamaka Chisom} and {Chika Chioma Unachukwu} and {Blessing Osawaru}},
urldate = {2025-01-26},
date = {2024-01-15},
file = {Full text:files/724/Onyebuchi Nneamaka Chisom et al. - 2024 - REVIEW OF AI IN EDUCATION TRANSFORMING LEARNING ENVIRONMENTS IN AFRICA.pdf:application/pdf},
}
@misc{deepseek-ai_deepseek-v2_2024,
title = {{DeepSeek}-V2: A Strong, Economical, and Efficient Mixture-of-Experts Language Model},
rights = {{arXiv}.org perpetual, non-exclusive license},
url = {https://arxiv.org/abs/2405.04434},
doi = {10.48550/ARXIV.2405.04434},
shorttitle = {{DeepSeek}-V2},
abstract = {We present {DeepSeek}-V2, a strong Mixture-of-Experts ({MoE}) language model characterized by economical training and efficient inference. It comprises 236B total parameters, of which 21B are activated for each token, and supports a context length of 128K tokens. {DeepSeek}-V2 adopts innovative architectures including Multi-head Latent Attention ({MLA}) and {DeepSeekMoE}. {MLA} guarantees efficient inference through significantly compressing the Key-Value ({KV}) cache into a latent vector, while {DeepSeekMoE} enables training strong models at an economical cost through sparse computation. Compared with {DeepSeek} 67B, {DeepSeek}-V2 achieves significantly stronger performance, and meanwhile saves 42.5\% of training costs, reduces the {KV} cache by 93.3\%, and boosts the maximum generation throughput to 5.76 times. We pretrain {DeepSeek}-V2 on a high-quality and multi-source corpus consisting of 8.1T tokens, and further perform Supervised Fine-Tuning ({SFT}) and Reinforcement Learning ({RL}) to fully unlock its potential. Evaluation results show that, even with only 21B activated parameters, {DeepSeek}-V2 and its chat versions still achieve top-tier performance among open-source models.},
publisher = {{arXiv}},
author = {{DeepSeek-AI} and Liu, Aixin and Feng, Bei and Wang, Bin and Wang, Bingxuan and Liu, Bo and Zhao, Chenggang and Dengr, Chengqi and Ruan, Chong and Dai, Damai and Guo, Daya and Yang, Dejian and Chen, Deli and Ji, Dongjie and Li, Erhang and Lin, Fangyun and Luo, Fuli and Hao, Guangbo and Chen, Guanting and Li, Guowei and Zhang, H. and Xu, Hanwei and Yang, Hao and Zhang, Haowei and Ding, Honghui and Xin, Huajian and Gao, Huazuo and Li, Hui and Qu, Hui and Cai, J. L. and Liang, Jian and Guo, Jianzhong and Ni, Jiaqi and Li, Jiashi and Chen, Jin and Yuan, Jingyang and Qiu, Junjie and Song, Junxiao and Dong, Kai and Gao, Kaige and Guan, Kang and Wang, Lean and Zhang, Lecong and Xu, Lei and Xia, Leyi and Zhao, Liang and Zhang, Liyue and Li, Meng and Wang, Miaojun and Zhang, Mingchuan and Zhang, Minghua and Tang, Minghui and Li, Mingming and Tian, Ning and Huang, Panpan and Wang, Peiyi and Zhang, Peng and Zhu, Qihao and Chen, Qinyu and Du, Qiushi and Chen, R. J. and Jin, R. L. and Ge, Ruiqi and Pan, Ruizhe and Xu, Runxin and Chen, Ruyi and Li, S. S. and Lu, Shanghao and Zhou, Shangyan and Chen, Shanhuang and Wu, Shaoqing and Ye, Shengfeng and Ma, Shirong and Wang, Shiyu and Zhou, Shuang and Yu, Shuiping and Zhou, Shunfeng and Zheng, Size and Wang, T. and Pei, Tian and Yuan, Tian and Sun, Tianyu and Xiao, W. L. and Zeng, Wangding and An, Wei and Liu, Wen and Liang, Wenfeng and Gao, Wenjun and Zhang, Wentao and Li, X. Q. and Jin, Xiangyue and Wang, Xianzu and Bi, Xiao and Liu, Xiaodong and Wang, Xiaohan and Shen, Xiaojin and Chen, Xiaokang and Chen, Xiaosha and Nie, Xiaotao and Sun, Xiaowen and Wang, Xiaoxiang and Liu, Xin and Xie, Xin and Yu, Xingkai and Song, Xinnan and Zhou, Xinyi and Yang, Xinyu and Lu, Xuan and Su, Xuecheng and Wu, Y. and Li, Y. K. and Wei, Y. X. and Zhu, Y. X. and Xu, Yanhong and Huang, Yanping and Li, Yao and Zhao, Yao and Sun, Yaofeng and Li, Yaohui and Wang, Yaohui and Zheng, Yi and Zhang, Yichao and Xiong, Yiliang and Zhao, Yilong and He, Ying and Tang, Ying and Piao, Yishi and Dong, Yixin and Tan, Yixuan and Liu, Yiyuan and Wang, Yongji and Guo, Yongqiang and Zhu, Yuchen and Wang, Yuduan and Zou, Yuheng and Zha, Yukun and Ma, Yunxian and Yan, Yuting and You, Yuxiang and Liu, Yuxuan and Ren, Z. Z. and Ren, Zehui and Sha, Zhangli and Fu, Zhe and Huang, Zhen and Zhang, Zhen and Xie, Zhenda and Hao, Zhewen and Shao, Zhihong and Wen, Zhiniu and Xu, Zhipeng and Zhang, Zhongyu and Li, Zhuoshu and Wang, Zihan and Gu, Zihui and Li, Zilin and Xie, Ziwei},
urldate = {2025-01-26},
date = {2024},
note = {Version Number: 5},
keywords = {Artificial Intelligence (cs.{AI}), Computation and Language (cs.{CL}), {FOS}: Computer and information sciences},
}
@article{chima_abimbola_edeni_role_2024,
title = {The role of {AI}-enhanced tools in overcoming socioeconomic barriers in education: A conceptual analysis},
volume = {21},
issn = {25819615},
url = {https://wjarr.com/content/role-ai-enhanced-tools-overcoming-socioeconomic-barriers-education-conceptual-analysis},
doi = {10.30574/wjarr.2024.21.3.0780},
shorttitle = {The role of {AI}-enhanced tools in overcoming socioeconomic barriers in education},
abstract = {This conceptual analysis explores the transformative potential of {AI}-enhanced tools in addressing socioeconomic barriers within the educational landscape. By leveraging artificial intelligence ({AI}) technologies, the paper aims to examine how such tools can mitigate disparities arising from economic, social, and cultural factors. Through a critical analysis, it seeks to elucidate the role of {AI} in promoting equitable access, enhancing learning outcomes, and fostering inclusivity in education. The executive summary encapsulates the essence of the conceptual analysis. It provides a concise overview of the paper's objectives, methodology, expected outcomes, and implications. In recent years, the intersection of artificial intelligence ({AI}) and education has garnered significant attention as a potential solution to address persistent socioeconomic barriers within the educational landscape. The executive summary outlines the imperative to explore how {AI}-enhanced tools can serve as transformative agents in mitigating disparities arising from economic, social, and cultural factors. By leveraging {AI} technologies, educators and policymakers have the opportunity to revolutionize traditional educational practices and foster more inclusive learning environments. The summary highlights the urgent need to examine the role of {AI} in promoting equitable access, enhancing learning outcomes, and fostering inclusivity across diverse socioeconomic backgrounds. Through a critical analysis of existing literature, case studies, and empirical research, the conceptual analysis seeks to elucidate the potential of {AI} to bridge the digital divide and advance educational equity. It emphasizes the importance of identifying actionable strategies and best practices for leveraging {AI} technology to address systemic inequalities in education.},
pages = {944--951},
number = {3},
journaltitle = {World Journal of Advanced Research and Reviews},
shortjournal = {World J. Adv. Res. Rev.},
author = {{Chima Abimbola Edeni} and {Olabisi Oluwakemi Adeleye} and {Idowu Sulaimon Adeniyi}},
urldate = {2025-01-26},
date = {2024-03-30},
file = {Full text:files/727/Chima Abimbola Edeni et al. - 2024 - The role of AI-enhanced tools in overcoming socioeconomic barriers in education A conceptual analys.pdf:application/pdf},
}
@article{li_ai_2023,
title = {{AI} in Education: Bridging the Divide or Widening the Gap? Exploring Equity, Opportunities, and Challenges in the Digital Age},
volume = {8},
issn = {2790-167X},
url = {https://madison-proceedings.com/index.php/aehssr/article/view/1924},
doi = {10.56028/aehssr.8.1.355.2023},
shorttitle = {{AI} in Education},
abstract = {Artificial Intelligence ({AI}) stands as a pivotal technological advancement with profound societal implications. This paper delves into a comprehensive analysis of diverse articles and perspectives to scrutinize {AI}'s influence on educational inequality, particularly within the context of the Chinese education system. While prevailing literature often skims the surface, there's a burgeoning sentiment celebrating the human-{AI} synergy, often overlooking its potential to accentuate educational disparities. This research delves deeper, uncovering the intricate nexus between {AI}-driven education and human capital markets. The findings suggest that {AI}, while promising, might inadvertently perpetuate the same crises across different demographics, amplifying existing inequalities. The strong may become stronger, while the vulnerable risk further marginalization, primarily due to disparities in resource allocation. To mitigate these challenges, this paper proposes three actionable recommendations. Furthermore, recognizing the global implications of this issue, the study advocates for international collaboration to ensure equitable access to {AI}-related educational resources, championing the cause of educational fairness worldwide.},
pages = {355},
number = {1},
journaltitle = {Advances in Education, Humanities and Social Science Research},
shortjournal = {{AEHSSR}},
author = {Li, Haomin},
urldate = {2025-01-26},
date = {2023-12-06},
file = {Full text:files/729/Li - 2023 - AI in Education Bridging the Divide or Widening the Gap Exploring Equity, Opportunities, and Chall.pdf:application/pdf},
}
@misc{qu_survey_2024,
title = {A Survey of Mamba},
rights = {Creative Commons Attribution Non Commercial Share Alike 4.0 International},
url = {https://arxiv.org/abs/2408.01129},
doi = {10.48550/ARXIV.2408.01129},
abstract = {As one of the most representative {DL} techniques, Transformer architecture has empowered numerous advanced models, especially the large language models ({LLMs}) that comprise billions of parameters, becoming a cornerstone in deep learning. Despite the impressive achievements, Transformers still face inherent limitations, particularly the time-consuming inference resulting from the quadratic computation complexity of attention calculation. Recently, a novel architecture named Mamba, drawing inspiration from classical state space models ({SSMs}), has emerged as a promising alternative for building foundation models, delivering comparable modeling abilities to Transformers while preserving near-linear scalability concerning sequence length. This has sparked an increasing number of studies actively exploring Mamba's potential to achieve impressive performance across diverse domains. Given such rapid evolution, there is a critical need for a systematic review that consolidates existing Mamba-empowered models, offering a comprehensive understanding of this emerging model architecture. In this survey, we therefore conduct an in-depth investigation of recent Mamba-associated studies, covering three main aspects: the advancements of Mamba-based models, the techniques of adapting Mamba to diverse data, and the applications where Mamba can excel. Specifically, we first review the foundational knowledge of various representative deep learning models and the details of Mamba-1\&2 as preliminaries. Then, to showcase the significance of Mamba for {AI}, we comprehensively review the related studies focusing on Mamba models' architecture design, data adaptability, and applications. Finally, we present a discussion of current limitations and explore various promising research directions to provide deeper insights for future investigations.},
publisher = {{arXiv}},
author = {Qu, Haohao and Ning, Liangbo and An, Rui and Fan, Wenqi and Derr, Tyler and Liu, Hui and Xu, Xin and Li, Qing},
urldate = {2025-01-26},
date = {2024},
note = {Version Number: 5},
keywords = {Artificial Intelligence (cs.{AI}), {FOS}: Computer and information sciences, Machine Learning (cs.{LG})},
}
@article{wilson_gpu_2022,
title = {{GPU} {PRICES} {AND} {CRYPTOCURRENCY} {RETURNS}},
volume = {11},
rights = {http://creativecommons.org/licenses/by-nc-nd/4.0},
issn = {2253-5802, 2253-5799},
url = {https://ojs.aut.ac.nz/applied-finance-letters/article/view/503},
doi = {10.24135/afl.v11i.503},
abstract = {We look at the association between the price of a cryptocurrency and the secondary market prices of the hardware used to mine it. We find the prices of the most efficient Graphical Processing Units ({GPUs}) for Ethereum mining are significantly positively correlated with the daily price returns to that cryptocurrency.},
pages = {2--8},
journaltitle = {Applied Finance Letters},
shortjournal = {{AFL}},
author = {Wilson, Linus},
urldate = {2025-01-26},
date = {2022-03-06},
file = {Full text:files/732/Wilson - 2022 - GPU PRICES AND CRYPTOCURRENCY RETURNS.pdf:application/pdf},
}
@misc{xiao_large_2024,
title = {Large Language Model Performance Benchmarking on Mobile Platforms: A Thorough Evaluation},
rights = {{arXiv}.org perpetual, non-exclusive license},
url = {https://arxiv.org/abs/2410.03613},
doi = {10.48550/ARXIV.2410.03613},
shorttitle = {Large Language Model Performance Benchmarking on Mobile Platforms},
abstract = {As large language models ({LLMs}) increasingly integrate into every aspect of our work and daily lives, there are growing concerns about user privacy, which push the trend toward local deployment of these models. There are a number of lightweight {LLMs} (e.g., Gemini Nano, {LLAMA}2 7B) that can run locally on smartphones, providing users with greater control over their personal data. As a rapidly emerging application, we are concerned about their performance on commercial-off-the-shelf mobile devices. To fully understand the current landscape of {LLM} deployment on mobile platforms, we conduct a comprehensive measurement study on mobile devices. We evaluate both metrics that affect user experience, including token throughput, latency, and battery consumption, as well as factors critical to developers, such as resource utilization, {DVFS} strategies, and inference engines. In addition, we provide a detailed analysis of how these hardware capabilities and system dynamics affect on-device {LLM} performance, which may help developers identify and address bottlenecks for mobile {LLM} applications. We also provide comprehensive comparisons across the mobile system-on-chips ({SoCs}) from major vendors, highlighting their performance differences in handling {LLM} workloads. We hope that this study can provide insights for both the development of on-device {LLMs} and the design for future mobile system architecture.},
publisher = {{arXiv}},
author = {Xiao, Jie and Huang, Qianyi and Chen, Xu and Tian, Chen},
urldate = {2025-01-26},
date = {2024},
note = {Version Number: 1},
keywords = {{FOS}: Computer and information sciences, Machine Learning (cs.{LG})},
}
@misc{yong_low-resource_2023,
title = {Low-Resource Languages Jailbreak {GPT}-4},
rights = {{arXiv}.org perpetual, non-exclusive license},
url = {https://arxiv.org/abs/2310.02446},
doi = {10.48550/ARXIV.2310.02446},
abstract = {{AI} safety training and red-teaming of large language models ({LLMs}) are measures to mitigate the generation of unsafe content. Our work exposes the inherent cross-lingual vulnerability of these safety mechanisms, resulting from the linguistic inequality of safety training data, by successfully circumventing {GPT}-4's safeguard through translating unsafe English inputs into low-resource languages. On the {AdvBenchmark}, {GPT}-4 engages with the unsafe translated inputs and provides actionable items that can get the users towards their harmful goals 79\% of the time, which is on par with or even surpassing state-of-the-art jailbreaking attacks. Other high-/mid-resource languages have significantly lower attack success rate, which suggests that the cross-lingual vulnerability mainly applies to low-resource languages. Previously, limited training on low-resource languages primarily affects speakers of those languages, causing technological disparities. However, our work highlights a crucial shift: this deficiency now poses a risk to all {LLMs} users. Publicly available translation {APIs} enable anyone to exploit {LLMs}' safety vulnerabilities. Therefore, our work calls for a more holistic red-teaming efforts to develop robust multilingual safeguards with wide language coverage.},
publisher = {{arXiv}},
author = {Yong, Zheng-Xin and Menghini, Cristina and Bach, Stephen H.},
urldate = {2025-01-26},
date = {2023},
note = {Version Number: 2},
keywords = {Artificial Intelligence (cs.{AI}), Computation and Language (cs.{CL}), Cryptography and Security (cs.{CR}), {FOS}: Computer and information sciences, Machine Learning (cs.{LG})},
}
@misc{zhong_opportunities_2024,
title = {Opportunities and Challenges of Large Language Models for Low-Resource Languages in Humanities Research},
rights = {Creative Commons Attribution Non Commercial No Derivatives 4.0 International},
url = {https://arxiv.org/abs/2412.04497},
doi = {10.48550/ARXIV.2412.04497},
abstract = {Low-resource languages serve as invaluable repositories of human history, embodying cultural evolution and intellectual diversity. Despite their significance, these languages face critical challenges, including data scarcity and technological limitations, which hinder their comprehensive study and preservation. Recent advancements in large language models ({LLMs}) offer transformative opportunities for addressing these challenges, enabling innovative methodologies in linguistic, historical, and cultural research. This study systematically evaluates the applications of {LLMs} in low-resource language research, encompassing linguistic variation, historical documentation, cultural expressions, and literary analysis. By analyzing technical frameworks, current methodologies, and ethical considerations, this paper identifies key challenges such as data accessibility, model adaptability, and cultural sensitivity. Given the cultural, historical, and linguistic richness inherent in low-resource languages, this work emphasizes interdisciplinary collaboration and the development of customized models as promising avenues for advancing research in this domain. By underscoring the potential of integrating artificial intelligence with the humanities to preserve and study humanity's linguistic and cultural heritage, this study fosters global efforts towards safeguarding intellectual diversity.},
publisher = {{arXiv}},
author = {Zhong, Tianyang and Yang, Zhenyuan and Liu, Zhengliang and Zhang, Ruidong and Liu, Yiheng and Sun, Haiyang and Pan, Yi and Li, Yiwei and Zhou, Yifan and Jiang, Hanqi and Chen, Junhao and Liu, Tianming},
urldate = {2025-01-26},
date = {2024},
note = {Version Number: 2},
keywords = {Artificial Intelligence (cs.{AI}), Computation and Language (cs.{CL}), {FOS}: Computer and information sciences},
}
@article{walter_embracing_2024,
title = {Embracing the future of Artificial Intelligence in the classroom: the relevance of {AI} literacy, prompt engineering, and critical thinking in modern education},
volume = {21},
issn = {2365-9440},
url = {https://educationaltechnologyjournal.springeropen.com/articles/10.1186/s41239-024-00448-3},
doi = {10.1186/s41239-024-00448-3},
shorttitle = {Embracing the future of Artificial Intelligence in the classroom},
abstract = {Abstract
The present discussion examines the transformative impact of Artificial Intelligence ({AI}) in educational settings, focusing on the necessity for {AI} literacy, prompt engineering proficiency, and enhanced critical thinking skills. The introduction of {AI} into education marks a significant departure from conventional teaching methods, offering personalized learning and support for diverse educational requirements, including students with special needs. However, this integration presents challenges, including the need for comprehensive educator training and curriculum adaptation to align with societal structures. {AI} literacy is identified as crucial, encompassing an understanding of {AI} technologies and their broader societal impacts. Prompt engineering is highlighted as a key skill for eliciting specific responses from {AI} systems, thereby enriching educational experiences and promoting critical thinking. There is detailed analysis of strategies for embedding these skills within educational curricula and pedagogical practices. This is discussed through a case-study based on a Swiss university and a narrative literature review, followed by practical suggestions of how to implement {AI} in the classroom.},
pages = {15},
number = {1},
journaltitle = {International Journal of Educational Technology in Higher Education},
shortjournal = {Int J Educ Technol High Educ},
author = {Walter, Yoshija},
urldate = {2025-01-26},
date = {2024-02-26},
langid = {english},
file = {Full text:files/742/Walter - 2024 - Embracing the future of Artificial Intelligence in the classroom the relevance of AI literacy, prom.pdf:application/pdf},
}
@article{bauchner_use_2024,
title = {Use of artificial intelligence and the future of peer review},
volume = {2},
rights = {https://creativecommons.org/licenses/by-nc/4.0/},
issn = {2976-5390},
url = {https://academic.oup.com/healthaffairsscholar/article/doi/10.1093/haschl/qxae058/7663651},
doi = {10.1093/haschl/qxae058},
abstract = {Abstract
Conducting high-quality peer review of scientific manuscripts has become increasingly challenging. The substantial increase in the number of manuscripts, lack of a sufficient number of peer-reviewers, and questions related to effectiveness, fairness, and efficiency, require a different approach. Large-language models, 1 form of artificial intelligence ({AI}), have emerged as a new approach to help resolve many of the issues facing contemporary medicine and science. We believe {AI} should be used to assist in the triaging of manuscripts submitted for peer-review publication.},
pages = {qxae058},
number = {5},
journaltitle = {Health Affairs Scholar},
author = {Bauchner, Howard and Rivara, Frederick P},
urldate = {2025-01-26},
date = {2024-05-03},
langid = {english},
file = {Full text:files/745/Bauchner e Rivara - 2024 - Use of artificial intelligence and the future of peer review.pdf:application/pdf},
}
@article{noauthor_reviewing_2024,
title = {Reviewing the performance of {AI} detection tools in differentiating between {AI}-generated and human-written texts: A literature and integrative hybrid review},
volume = {7},
issn = {2591-801X, 2591-801X},
url = {https://journals.sfu.ca/jalt/index.php/jalt/article/view/1369},
doi = {10.37074/jalt.2024.7.1.14},
shorttitle = {Reviewing the performance of {AI} detection tools in differentiating between {AI}-generated and human-written texts},
number = {1},
journaltitle = {Journal of Applied Learning \& Teaching},
shortjournal = {{JALT}},
urldate = {2025-01-26},
date = {2024-02-07},
langid = {english},
file = {Full text:files/747/2024 - Reviewing the performance of AI detection tools in differentiating between AI-generated and human-wr.pdf:application/pdf},
}
@misc{kirchenbauer_watermark_2023,
title = {A Watermark for Large Language Models},
rights = {{arXiv}.org perpetual, non-exclusive license},
url = {https://arxiv.org/abs/2301.10226},
doi = {10.48550/ARXIV.2301.10226},
abstract = {Potential harms of large language models can be mitigated by watermarking model output, i.e., embedding signals into generated text that are invisible to humans but algorithmically detectable from a short span of tokens. We propose a watermarking framework for proprietary language models. The watermark can be embedded with negligible impact on text quality, and can be detected using an efficient open-source algorithm without access to the language model {API} or parameters. The watermark works by selecting a randomized set of "green" tokens before a word is generated, and then softly promoting use of green tokens during sampling. We propose a statistical test for detecting the watermark with interpretable p-values, and derive an information-theoretic framework for analyzing the sensitivity of the watermark. We test the watermark using a multi-billion parameter model from the Open Pretrained Transformer ({OPT}) family, and discuss robustness and security.},
publisher = {{arXiv}},
author = {Kirchenbauer, John and Geiping, Jonas and Wen, Yuxin and Katz, Jonathan and Miers, Ian and Goldstein, Tom},
urldate = {2025-01-26},
date = {2023},
note = {Version Number: 4},
keywords = {Computation and Language (cs.{CL}), Cryptography and Security (cs.{CR}), {FOS}: Computer and information sciences, Machine Learning (cs.{LG})},
}
@article{liang_gpt_2023,
title = {{GPT} detectors are biased against non-native English writers},
volume = {4},
issn = {26663899},
url = {https://linkinghub.elsevier.com/retrieve/pii/S2666389923001307},
doi = {10.1016/j.patter.2023.100779},
pages = {100779},
number = {7},
journaltitle = {Patterns},
shortjournal = {Patterns},
author = {Liang, Weixin and Yuksekgonul, Mert and Mao, Yining and Wu, Eric and Zou, James},
urldate = {2025-01-26},
date = {2023-07},
langid = {english},
file = {PubMed Central Full Text PDF:files/768/Liang et al. - 2023 - GPT detectors are biased against non-native English writers.pdf:application/pdf},
}
@misc{lu_ai_2024,
title = {The {AI} Scientist: Towards Fully Automated Open-Ended Scientific Discovery},
rights = {Creative Commons Attribution 4.0 International},
url = {https://arxiv.org/abs/2408.06292},
doi = {10.48550/ARXIV.2408.06292},
shorttitle = {The {AI} Scientist},
abstract = {One of the grand challenges of artificial general intelligence is developing agents capable of conducting scientific research and discovering new knowledge. While frontier models have already been used as aides to human scientists, e.g. for brainstorming ideas, writing code, or prediction tasks, they still conduct only a small part of the scientific process. This paper presents the first comprehensive framework for fully automatic scientific discovery, enabling frontier large language models to perform research independently and communicate their findings. We introduce The {AI} Scientist, which generates novel research ideas, writes code, executes experiments, visualizes results, describes its findings by writing a full scientific paper, and then runs a simulated review process for evaluation. In principle, this process can be repeated to iteratively develop ideas in an open-ended fashion, acting like the human scientific community. We demonstrate its versatility by applying it to three distinct subfields of machine learning: diffusion modeling, transformer-based language modeling, and learning dynamics. Each idea is implemented and developed into a full paper at a cost of less than \$15 per paper. To evaluate the generated papers, we design and validate an automated reviewer, which we show achieves near-human performance in evaluating paper scores. The {AI} Scientist can produce papers that exceed the acceptance threshold at a top machine learning conference as judged by our automated reviewer. This approach signifies the beginning of a new era in scientific discovery in machine learning: bringing the transformative benefits of {AI} agents to the entire research process of {AI} itself, and taking us closer to a world where endless affordable creativity and innovation can be unleashed on the world's most challenging problems. Our code is open-sourced at https://github.com/{SakanaAI}/{AI}-Scientist},
publisher = {{arXiv}},
author = {Lu, Chris and Lu, Cong and Lange, Robert Tjarko and Foerster, Jakob and Clune, Jeff and Ha, David},
urldate = {2025-01-26},
date = {2024},
note = {Version Number: 3},
keywords = {Artificial Intelligence (cs.{AI}), Computation and Language (cs.{CL}), {FOS}: Computer and information sciences, Machine Learning (cs.{LG})},
}
@misc{sadasivan_can_2023,
title = {Can {AI}-Generated Text be Reliably Detected?},
rights = {Creative Commons Attribution 4.0 International},
url = {https://arxiv.org/abs/2303.11156},
doi = {10.48550/ARXIV.2303.11156},
abstract = {Large Language Models ({LLMs}) perform impressively well in various applications. However, the potential for misuse of these models in activities such as plagiarism, generating fake news, and spamming has raised concern about their responsible use. Consequently, the reliable detection of {AI}-generated text has become a critical area of research. {AI} text detectors have shown to be effective under their specific settings. In this paper, we stress-test the robustness of these {AI} text detectors in the presence of an attacker. We introduce recursive paraphrasing attack to stress test a wide range of detection schemes, including the ones using the watermarking as well as neural network-based detectors, zero shot classifiers, and retrieval-based detectors. Our experiments conducted on passages, each approximately 300 tokens long, reveal the varying sensitivities of these detectors to our attacks. Our findings indicate that while our recursive paraphrasing method can significantly reduce detection rates, it only slightly degrades text quality in many cases, highlighting potential vulnerabilities in current detection systems in the presence of an attacker. Additionally, we investigate the susceptibility of watermarked {LLMs} to spoofing attacks aimed at misclassifying human-written text as {AI}-generated. We demonstrate that an attacker can infer hidden {AI} text signatures without white-box access to the detection method, potentially leading to reputational risks for {LLM} developers. Finally, we provide a theoretical framework connecting the {AUROC} of the best possible detector to the Total Variation distance between human and {AI} text distributions. This analysis offers insights into the fundamental challenges of reliable detection as language models continue to advance. Our code is publicly available at https://github.com/vinusankars/Reliability-of-{AI}-text-detectors.},
publisher = {{arXiv}},
author = {Sadasivan, Vinu Sankar and Kumar, Aounon and Balasubramanian, Sriram and Wang, Wenxiao and Feizi, Soheil},
urldate = {2025-01-26},
date = {2023},
note = {Version Number: 4},
keywords = {Artificial Intelligence (cs.{AI}), Computation and Language (cs.{CL}), {FOS}: Computer and information sciences, Machine Learning (cs.{LG})},
}
@misc{yakura_empirical_2024,
title = {Empirical evidence of Large Language Model's influence on human spoken communication},
rights = {{arXiv}.org perpetual, non-exclusive license},
url = {https://arxiv.org/abs/2409.01754},
doi = {10.48550/ARXIV.2409.01754},
abstract = {Artificial Intelligence ({AI}) agents now interact with billions of humans in natural language, thanks to advances in Large Language Models ({LLMs}) like {ChatGPT}. This raises the question of whether {AI} has the potential to shape a fundamental aspect of human culture: the way we speak. Recent analyses revealed that scientific publications already exhibit evidence of {AI}-specific language. But this evidence is inconclusive, since scientists may simply be using {AI} to copy-edit their writing. To explore whether {AI} has influenced human spoken communication, we transcribed and analyzed about 280,000 English-language videos of presentations, talks, and speeches from more than 20,000 {YouTube} channels of academic institutions. We find a significant shift in the trend of word usage specific to words distinctively associated with {ChatGPT} following its release. These findings provide the first empirical evidence that humans increasingly imitate {LLMs} in their spoken language. Our results raise societal and policy-relevant concerns about the potential of {AI} to unintentionally reduce linguistic diversity, or to be deliberately misused for mass manipulation. They also highlight the need for further investigation into the feedback loops between machine behavior and human culture.},
publisher = {{arXiv}},
author = {Yakura, Hiromu and Lopez-Lopez, Ezequiel and Brinkmann, Levin and Serna, Ignacio and Gupta, Prateek and Rahwan, Iyad},
urldate = {2025-01-26},
date = {2024},
note = {Version Number: 1},
keywords = {Artificial Intelligence (cs.{AI}), Computation and Language (cs.{CL}), Computers and Society (cs.{CY}), {FOS}: Computer and information sciences, Human-Computer Interaction (cs.{HC})},
}
@article{hopfenbeck_challenges_2023,
title = {Challenges and opportunities for classroom-based formative assessment and {AI}: a perspective article},
volume = {8},
issn = {2504-284X},
url = {https://www.frontiersin.org/articles/10.3389/feduc.2023.1270700/full},
doi = {10.3389/feduc.2023.1270700},
shorttitle = {Challenges and opportunities for classroom-based formative assessment and {AI}},
abstract = {The integration of artificial intelligence ({AI}) into educational contexts may give rise to both positive and negative ramifications for teachers uses of formative assessment within their classrooms. Drawing on our diverse experiences as academics, researchers, psychometricians, teachers, and teacher educators specializing in formative assessment, we examine the pedagogical practices in which teachers provide feedback, facilitate peer- and self-assessments, and support students learning, and discuss how existing challenges to each of these may be affected by applications of {AI}. Firstly, we overview the challenges in the practice of formative assessment independently of the influence of {AI}. Moreover, based on the authors varied experience in formative assessment, we discuss the opportunities that {AI} brings to address the challenges in formative assessment as well as the new challenges introduced by the application of {AI} in formative assessment. Finally, we argue for the ongoing importance of self-regulated learning and a renewed emphasis on critical thinking for more effective implementation of formative assessment in this new {AI}-driven digital age.},
pages = {1270700},
journaltitle = {Frontiers in Education},
shortjournal = {Front. Educ.},
author = {Hopfenbeck, Therese N. and Zhang, Zhonghua and Sun, Sundance Zhihong and Robertson, Pam and {McGrane}, Joshua A.},
urldate = {2025-01-26},
date = {2023-11-23},
file = {Full text:files/779/Hopfenbeck et al. - 2023 - Challenges and opportunities for classroom-based formative assessment and AI a perspective article.pdf:application/pdf},
}
@article{nicol_power_2021,
title = {The power of internal feedback: exploiting natural comparison processes},
volume = {46},
issn = {0260-2938, 1469-297X},
url = {https://www.tandfonline.com/doi/full/10.1080/02602938.2020.1823314},
doi = {10.1080/02602938.2020.1823314},
shorttitle = {The power of internal feedback},
pages = {756--778},
number = {5},
journaltitle = {Assessment \& Evaluation in Higher Education},
shortjournal = {Assessment \& Evaluation in Higher Education},
author = {Nicol, David},
urldate = {2025-01-26},
date = {2021-07-04},
langid = {english},
file = {Full text:files/784/Nicol - 2021 - The power of internal feedback exploiting natural comparison processes.pdf:application/pdf},
}
@article{nicol_making_2022,
title = {Making internal feedback explicit: harnessing the comparisons students make during two-stage exams},
volume = {47},
issn = {0260-2938, 1469-297X},
url = {https://www.tandfonline.com/doi/full/10.1080/02602938.2021.1934653},
doi = {10.1080/02602938.2021.1934653},
shorttitle = {Making internal feedback explicit},
pages = {507--522},
number = {4},
journaltitle = {Assessment \& Evaluation in Higher Education},
shortjournal = {Assessment \& Evaluation in Higher Education},
author = {Nicol, David and Selvaretnam, Geethanjali},
urldate = {2025-01-26},
date = {2022-05-19},
langid = {english},
file = {Versione accettata:files/793/Nicol e Selvaretnam - 2022 - Making internal feedback explicit harnessing the comparisons students make during two-stage exams.pdf:application/pdf},
}
@article{r_ai-driven_2024,
title = {{AI}-Driven Flipped Classroom: Revolutionizing Education Through Digital Pedagogy},
volume = {7},
rights = {https://creativecommons.org/licenses/by-nc-nd/4.0},
issn = {2682-6704},
url = {https://abjournals.org/bjeldp/papers/volume-7/issue-2/ai-driven-flipped-classroom-revolutionizing-education-through-digital-pedagogy/},
doi = {10.52589/BJELDP-LTDJFLIH},
shorttitle = {{AI}-Driven Flipped Classroom},
abstract = {The integration of artificial intelligence ({AI}) into the flipped classroom model is the subject of this research paper. With the flipped classroom approach, traditional teaching methods are reversed, with instructional content being delivered outside of class and class time being devoted to discussions, activities, and problem-solving. Teachers want to give students a personalized learning experience, and they do this by implementing {AI} technologies like intelligent tutoring systems, virtual tutors, and adaptive learning platforms. This study uses existing research and empirical studies to analyse the effects, advantages, difficulties, and efficacy of using {AI} in flipped classrooms. The study explores the use of {AI} in flipped classrooms, highlighting its potential benefits like improved learning outcomes and scalability. However, it also addresses challenges like technology infrastructure, teacher preparation, privacy, and equity, as well as potential drawbacks.},
pages = {169--179},
number = {2},
journaltitle = {British Journal of Education, Learning and Development Psychology},
shortjournal = {British Journal of Education, Learning and Development Psychology},
author = {R., Suvendu and P. S., Deb},
urldate = {2025-01-26},
date = {2024-06-24},
langid = {estonian},
file = {Full text:files/796/R. e P. S. - 2024 - AI-Driven Flipped Classroom Revolutionizing Education Through Digital Pedagogy.pdf:application/pdf},
}
@article{nurjanah_artificial_2024,
title = {Artificial Intelligence ({AI}) Usage In Todays Teaching And Learning Process: A Review},
volume = {6},
rights = {http://creativecommons.org/licenses/by-sa/4.0},
issn = {2684-883X, 2684-6853},
url = {https://jurnal.syntax-idea.co.id/index.php/syntax-idea/article/view/3126},
doi = {10.46799/syntax-idea.v6i3.3126},
shorttitle = {Artificial Intelligence ({AI}) Usage In Todays Teaching And Learning Process},
abstract = {In today's technology world, the integration of artificial intelligence ({AI}) has become increasingly prominent in education, with enormous potential to improve the teaching and learning experience. {AI}, defined by its ability to imitate human intelligence, possesses enormous power and has the potential to dramatically impact a variety of areas, most notably education. {AI} has significantly improved learning experiences for both teachers and students by allowing them to be customized and personalized. This review article investigates the prospects provided by {AI} in modern teaching and learning processes, with a special emphasis on its advantages in language learning. This study examines existing literature and studies on {AI} in education, with a focus on language learning environments. The results show {AI}'s advantages in giving targeted feedback and practice opportunities, making language learning easier, and improving overall learning efficiency and effectiveness. Thus, this review contributes to a better understanding of {AI}'s role in redefining present educational paradigms, as well as its potential to transform teaching and learning methodologies.},
pages = {1517--1523},
number = {3},
journaltitle = {Syntax Idea},
shortjournal = {{SLJIL}},
author = {Nurjanah, Aisyah and Salsabila, Irma Nuraeni and Azzahra, Adelia and Rahayu, Riska and Marlina, Nina},
urldate = {2025-01-26},
date = {2024-04-05},
file = {Full text:files/800/Nurjanah et al. - 2024 - Artificial Intelligence (AI) Usage In Todays Teaching And Learning Process A Review.pdf:application/pdf},
}
@article{chen_artificial_2020,
title = {Artificial Intelligence in Education: A Review},
volume = {8},
rights = {https://creativecommons.org/licenses/by/4.0/legalcode},
issn = {2169-3536},
url = {https://ieeexplore.ieee.org/document/9069875/},
doi = {10.1109/ACCESS.2020.2988510},
shorttitle = {Artificial Intelligence in Education},
pages = {75264--75278},
journaltitle = {{IEEE} Access},
shortjournal = {{IEEE} Access},
author = {Chen, Lijia and Chen, Pingping and Lin, Zhijian},
urldate = {2025-01-26},
date = {2020},
}
@article{cho_student_2010,
title = {Student revision with peer and expert reviewing},
volume = {20},
rights = {https://www.elsevier.com/tdm/userlicense/1.0/},
issn = {09594752},
url = {https://linkinghub.elsevier.com/retrieve/pii/S0959475209000747},
doi = {10.1016/j.learninstruc.2009.08.006},
pages = {328--338},
number = {4},
journaltitle = {Learning and Instruction},
shortjournal = {Learning and Instruction},
author = {Cho, Kwangsu and {MacArthur}, Charles},
urldate = {2025-01-26},
date = {2010-08},
langid = {english},
}
@article{redazione_quale_2006,
title = {Quale Storia? Intervista ad Antonio Brusa a proposito di didattica, insegnamento e ricerca storiografica},
issn = {1825-411X},
url = {https://doi.org/10.1473/stor327},
doi = {10.1473/stor327},
shorttitle = {Quale Storia?},
journaltitle = {Storicamente},
author = {Redazione, Redazione},
urldate = {2025-01-26},
date = {2006},
}