@prefix schema: <https://schema.org/> .
@prefix af: <https://anchorfact.org/ns#> .

<https://anchorfact.org/kb/ai/ai-benchmarks-and-evaluation>
  a schema:TechArticle ;
  schema:headline "AI Benchmarks: MMLU, SWE-bench, and How We Measure Intelligence" ;
  schema:url <https://anchorfact.org/ai/ai-benchmarks-and-evaluation/> ;
  af:status "public" ;
  af:confidence "medium" ;
  af:confidenceScore "0.82" ;
  af:confidenceBasis "verified_sources" ;
  af:generationMethod "ai_structured" .

<https://anchorfact.org/kb/ai/ai-benchmarks-and-evaluation>
  schema:citation <https://arxiv.org/abs/2009.03300> ;
  af:sourceTier "A" .

<https://anchorfact.org/kb/ai/ai-benchmarks-and-evaluation>
  schema:citation <https://arxiv.org/abs/2211.09110> ;
  af:sourceTier "A" .

<https://anchorfact.org/kb/ai/ai-benchmarks-and-evaluation>
  schema:citation <https://arxiv.org/abs/2206.04615> ;
  af:sourceTier "A" .

<https://anchorfact.org/kb/ai/ai-benchmarks-and-evaluation>
  schema:citation <https://arxiv.org/abs/2310.06770> ;
  af:sourceTier "A" .