discourse/spec/fixtures/onebox/assemblyai.response

86 lines
5.2 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<!DOCTYPE html>
<html lang="en" class="theme-light">
<head>
<title>How RLHF Works (And How Things May Go Wrong)</title>
<meta charset="utf-8" />
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<meta name="HandheldFriendly" content="True" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<meta name="description" content="How are Large Language Models (LLMs) like ChatGPT trained with Reinforcement Learning From Human Feedback (RLHF) to learn human preferences?">
<link rel="icon" href="https://www.assemblyai.com/blog/content/images/size/w256h256/2021/09/Frame-141-2.png" type="image/png">
<link rel="canonical" href="https://www.assemblyai.com/blog/how-rlhf-preference-model-tuning-works-and-how-things-may-go-wrong/">
<meta name="referrer" content="no-referrer-when-downgrade">
<meta property="og:site_name" content="News, Tutorials, AI Research">
<meta property="og:type" content="article">
<meta property="og:title" content="How RLHF Works (And How Things May Go Wrong)">
<meta property="og:description" content="Large Language Models like ChatGPT are trained with Reinforcement Learning From Human Feedback (RLHF) to learn human preferences. Lets uncover how RLHF works and survey its current strongest limitations.">
<meta property="og:url" content="https://www.assemblyai.com/blog/how-rlhf-preference-model-tuning-works-and-how-things-may-go-wrong/">
<meta property="og:image" content="https://www.assemblyai.com/blog/content/images/2023/08/Blog---RLHF-models.png">
<meta property="article:published_time" content="2023-08-03T14:38:05.000Z">
<meta property="article:modified_time" content="2023-08-07T11:17:39.000Z">
<meta property="article:tag" content="Deep Learning">
<meta property="article:tag" content="Popular">
<meta property="article:tag" content="no-chatbot">
<meta property="article:publisher" content="https://www.facebook.com/AssemblyAI">
<meta name="twitter:card" content="summary_large_image">
<meta name="twitter:title" content="How RLHF Works (And How Things May Go Wrong)">
<meta name="twitter:description" content="Large Language Models like ChatGPT are trained with Reinforcement Learning From Human Feedback (RLHF) to learn human preferences. Lets uncover how RLHF works and survey its current strongest limitations.">
<meta name="twitter:url" content="https://www.assemblyai.com/blog/how-rlhf-preference-model-tuning-works-and-how-things-may-go-wrong/">
<meta name="twitter:image" content="https://www.assemblyai.com/blog/content/images/2023/08/Blog---RLHF-models.png">
<meta name="twitter:label1" content="Written by">
<meta name="twitter:data1" content="Marco Ramponi">
<meta name="twitter:label2" content="Filed under">
<meta name="twitter:data2" content="Deep Learning, Popular, no-chatbot">
<meta name="twitter:site" content="@AssemblyAI">
<meta property="og:image:width" content="1600">
<meta property="og:image:height" content="900">
<script type="application/ld+json">
{
"@context": "https://schema.org",
"@type": "Article",
"publisher": {
"@type": "Organization",
"name": "News, Tutorials, AI Research",
"url": "https://www.assemblyai.com/blog/",
"logo": {
"@type": "ImageObject",
"url": "https://www.assemblyai.com/blog/content/images/size/w256h256/2021/09/Frame-141-2.png",
"width": 60,
"height": 60
}
},
"author": {
"@type": "Person",
"name": "Marco Ramponi",
"image": {
"@type": "ImageObject",
"url": "https://www.assemblyai.com/blog/content/images/2022/11/marco-foto-profile_cut.jpg",
"width": 1236,
"height": 1182
},
"url": "https://www.assemblyai.com/blog/author/marco/",
"sameAs": [
"https://www.linkedin.com/in/marco-ramponi-ai"
]
},
"headline": "How RLHF Works (And How Things May Go Wrong)",
"url": "https://www.assemblyai.com/blog/how-rlhf-preference-model-tuning-works-and-how-things-may-go-wrong/",
"datePublished": "2023-08-03T14:38:05.000Z",
"dateModified": "2023-08-07T11:17:39.000Z",
"image": {
"@type": "ImageObject",
"url": "https://www.assemblyai.com/blog/content/images/2023/08/Blog---RLHF-models.png",
"width": 1600,
"height": 900
},
"keywords": "Deep Learning, Popular, no-chatbot",
"description": "Large Language Models like ChatGPT are trained with Reinforcement Learning From Human Feedback (RLHF) to learn human preferences. Lets uncover how RLHF works and survey its current strongest limitations.",
"mainEntityOfPage": "https://www.assemblyai.com/blog/how-rlhf-preference-model-tuning-works-and-how-things-may-go-wrong/"
}
</script>
<meta name="generator" content="Ghost 5.58">
</head>
<body class="post-template tag-deep-learning tag-popular tag-no-chatbot"></body>
</html>