1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
| import tiktoken
def tiktoken_evaluation(ground_truth_refs, retrieved_text): encoder = tiktoken.get_encoding("cl100k_base") retrieved_tokens = set(encoder.encode(retrieved_text)) max_recall = 0 max_precision = 0 for ref in ground_truth_refs: ref_tokens = set(encoder.encode(ref)) common_tokens = retrieved_tokens & ref_tokens recall = len(common_tokens) / len(ref_tokens) precision = len(common_tokens) / len(retrieved_tokens) max_recall = max(max_recall, recall) max_precision = max(max_precision, precision) return max_recall, max_precision
retrieved = rainbow_example["retrieved_texts"][0] recall, precision = tiktoken_evaluation( rainbow_example["ground_truth_refs"], retrieved ) """ 第一条检索文本的token分析: "Sunlight bending through rain droplets creates rainbows with colorful bands." ↓ tiktoken分词 [sun, light, bend, ing, through, rain, drop, lets, creates, rainbow, s, with, color, ful, bands]
对比参考文本1: "Rainbows occur when sunlight is refracted through raindrops, splitting light into colors." ↓ tiktoken分词 [Rainbow, s, occur, when, sun, light, is, refract, ed, through, rain, drop, s, split, ing, light, into, color, s]
共同tokens: {sun, light, through, rain, drop, s, color} 召回率 = 7/19 = 0.37 精确率 = 7/15 = 0.47 """
|