{
  "@context": "https://schema.org",
  "@type": "TechArticle",
  "@id": "https://anchorfact.org/kb/kb-2026-00007",
  "headline": "Reinforcement Learning from Human Feedback (RLHF)",
  "description": "RLHF (Reinforcement Learning from Human Feedback) is a three-stage technique for aligning AI models with human preferences. First demonstrated on Atari games and simulated robotics (Christiano et al., 2017), it became the industry standard with OpenAI's InstructGPT (Ouyang et al., 2022, NeurIPS), which showed that a 1.3B parameter model fine-tuned with RLHF was preferred by human raters over the 175B GPT-3 base model. RLHF has since been adopted by all major LLM providers and is the alignment method behind ChatGPT, Claude, Gemini, and Grok. As of May 2026, the InstructGPT paper has been cited over 3,000 times.",
  "dateCreated": "2026-05-22T14:59:47.503Z",
  "dateModified": "2026-05-22T14:59:47.503Z",
  "author": {
    "@type": "Organization",
    "name": "AnchorFact"
  },
  "publisher": {
    "@type": "Organization",
    "name": "AnchorFact",
    "url": "https://anchorfact.org"
  },
  "license": "https://creativecommons.org/licenses/by/4.0/",
  "anchorfact:confidence": "high",
  "anchorfact:generationMethod": "human_only",
  "citation": []
}