PyTerrier demonstration for vaswani

In [1]:
import pyterrier as pt
if not pt.started():
    pt.init()

systems=[]
names=[]
dataset = pt.get_dataset('vaswani')
        
PyTerrier 0.6.0 has loaded Terrier 5.5 (built by craigmacdonald on 2021-05-20 13:12)
In [2]:
bm25_terrier_stemmed = pt.BatchRetrieve.from_dataset('vaswani', 'terrier_stemmed', wmodel='BM25')
systems.append(bm25_terrier_stemmed)
names.append('bm25_terrier_stemmed')


dph_terrier_stemmed = pt.BatchRetrieve.from_dataset('vaswani', 'terrier_stemmed', wmodel='DPH')
systems.append(dph_terrier_stemmed)
names.append('dph_terrier_stemmed')


dph_bo1_terrier_stemmed = dph_terrier_stemmed >> pt.rewrite.Bo1QueryExpansion(pt.get_dataset('vaswani').get_index('terrier_stemmed')) >> dph_terrier_stemmed
systems.append(dph_bo1_terrier_stemmed)
names.append('dph_bo1_terrier_stemmed')
In [3]:
pt.Experiment(
    systems,
    pt.get_dataset('vaswani').get_topics(),
    pt.get_dataset('vaswani').get_qrels(),
    batch_size=200,
    drop_unused=True,
    eval_metrics=['map'],
    names=names)
        
18:40:26.652 [main] WARN  o.t.a.batchquerying.TRECQuery - trec.encoding is not set; resorting to platform default (ISO-8859-1). Retrieval may be platform dependent. Recommend trec.encoding=UTF-8
Out[3]:
name map
0 bm25_terrier_stemmed 0.296517
1 dph_terrier_stemmed 0.283621
2 dph_bo1_terrier_stemmed 0.295176