Source code for dreamtools.dream3.D3C1.scoring

"""D3C1 scoring function


Original matlab code from Gustavo A. Stolovitzky and Robert Prill.

"""
import numpy as np
from dreamtools.core.challenge import Challenge
import pandas as pd

__all__ = ["D3C1"]


[docs]class D3C1(Challenge): """D3C1 scoring function to evaluate the accuracy of a prediction :: from dreamtools import D3C1 s = D3C1() filename = s.download_template() s.score(filename) """ def __init__(self, verbose=True, download=True, **kargs): Challenge.__init__(self, "D3C1", verbose, download, **kargs) # read data required for the scoring. self._init() def _init(self): if self._standalone is True: return self._load_proba() self.G = self._read_challenge1_file(self.download_goldstandard())
[docs] def download_goldstandard(self): filename = "D3C1_goldstandard.txt" filename = self._pj([self.classpath, 'goldstandard', filename]) return filename
[docs] def score(self, filename): """Scoring function :return: tuple with first element being the number of correct predictions and second element being the pvalue """ data = self._read_challenge1_file(filename) # add-up the correct predictions num_correct = np.logical_and(self.G == 1, data==1).sum().sum() pval = self.probability(num_correct) return num_correct, pval
def _read_challenge1_file(self, filename): d = pd.read_csv(filename, sep="\t", index_col=0) assert d.shape == (4, 7), 'Problem parsing file' return d def _load_proba(self): """load the probability density (really a prob mass function for this challenge)""" # There is a matlab file called data/PDF_SignalingCascadeChallenge.mat # but it appears to be very simple so it is hardcoded here below self.X = np.array([0, 1, 2, 3, 4]) self.Y = np.array([0.55357143, 0.33809524, 0.09285714, 0.01428571, 0.00119048]) # 4 proteins, 7 assignements. P(4 correct) = 1/ (7*6*5*4) # P3 = C^4_1, P2 = C^4_2, P1 = C^4_3, P0 = ...
[docs] def probability(self, x): dx = self.X[2] - self.X[1] return sum( np.double(self.X>=x) * self.Y * dx )
[docs] def download_template(self): """Return filename of a template to be used for testing""" filename = self._pj([self.classpath, 'templates', 'D3C1_template.txt']) return filename