|
qid2scores_orig = qid2scores.copy() |
|
|
|
for id, parent_ids in qid2dependency.items(): |
|
# zero-out scores if parent questions are answered 'no' |
|
any_parent_answered_no = False |
|
for parent_id in parent_ids: |
|
if parent_id == 0: |
|
continue |
|
if qid2scores[parent_id] == 0: |
|
any_parent_answered_no = True |
|
break |
|
if any_parent_answered_no: |
|
qid2scores[id] = 0 |
|
qid2validity[id] = False |
|
else: |
|
qid2validity[id] = True |
|
|
|
score = sum(qid2scores.values()) / len(qid2scores) |
|
scores.append(score) |
|
average_score = sum(scores) / len(scores) |
|
with open(args.res_path, 'a') as f: |
|
f.write(image_path + ', ' + ', '.join(str(i) for i in scores) + ', ' + str(average_score) + '\n') |
|
|
|
return average_score, qid2tuple, qid2scores_orig |
only the last image score is used to calcuate L1 category score
ELLA/dpg_bench/compute_dpg_bench.py
Lines 138 to 161 in 3c228f1
only the last image score is used to calcuate L1 category score