{
  "@context": "https://schema.org",
  "@type": "article",
  "@id": "https://anchorfact.org/kb/scene-text-recognition",
  "headline": "Scene Text Recognition: Transformer-Based OCR and End-to-End Text Spotting",
  "description": "Scene text recognition reads text in the wild -- street signs, storefronts, license plates, and handwritten notes captured by smartphone cameras. Transformer-based architectures have transformed OCR from fragile multi-stage pipelines to robust end-to-end models that handle curved text, diverse fonts, and challenging lighting conditions.",
  "dateCreated": "2026-05-24T02:49:13.659Z",
  "dateModified": "2026-05-24",
  "author": {
    "@type": "Organization",
    "name": "AnchorFact"
  },
  "publisher": {
    "@type": "Organization",
    "name": "AnchorFact",
    "url": "https://anchorfact.org"
  },
  "license": "https://creativecommons.org/licenses/by/4.0/",
  "anchorfact:confidence": "high",
  "anchorfact:generationMethod": "ai_assisted",
  "citation": [
    {
      "@type": "CreativeWork",
      "name": "TrOCR: Transformer-based Optical Character Recognition with Pre-trained Models",
      "sameAs": "https://arxiv.org/abs/2109.10282"
    },
    {
      "@type": "CreativeWork",
      "name": "A Two-Stage End-to-End Framework for Robust Scene Text Recognition",
      "sameAs": "https://www.mdpi.com/2079-9292/14/23/4594"
    }
  ]
}