From 372b2e5cc60b37622cff7b942625a749a189ea68 Mon Sep 17 00:00:00 2001 From: cfrainay <clement.frainay@inrae.fr> Date: Fri, 3 Feb 2023 17:28:36 +0100 Subject: [PATCH 01/20] [graph] Stub: create class for pathway topology analysis --- .../analyze/PathwayTopologyAnalaysis.java | 130 ++++++++++++++++++ 1 file changed, 130 insertions(+) create mode 100644 met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalaysis.java diff --git a/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalaysis.java b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalaysis.java new file mode 100644 index 000000000..0346d0197 --- /dev/null +++ b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalaysis.java @@ -0,0 +1,130 @@ +package fr.inrae.toulouse.metexplore.met4j_graph.computation.analyze; + +import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioMetabolite; +import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioNetwork; +import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioPathway; +import fr.inrae.toulouse.metexplore.met4j_graph.computation.analyze.centrality.PathBasedCentrality; +import fr.inrae.toulouse.metexplore.met4j_graph.core.compound.CompoundGraph; +import fr.inrae.toulouse.metexplore.met4j_graph.core.compound.ReactionEdge; + +import java.util.*; +import java.util.function.BiFunction; +import java.util.function.Function; +import java.util.stream.Collectors; + +/** + * Class to aggregate bioentity centrality into pathway score for Pathway Topology Analysis + */ +public class PathwayTopologyAnalaysis { + + Map<BioPathway,Collection<BioMetabolite>> kb; + CompoundGraph g; + Map<BioMetabolite,Double> data; + + public PathwayTopologyAnalaysis(Map<BioPathway,Collection<BioMetabolite>> knowledgeBase, CompoundGraph topology, Map<BioMetabolite,Double> compoundData){ + this.kb=knowledgeBase; + this.g = topology; + this.data=compoundData; + } + + public PathwayTopologyAnalaysis(BioNetwork knowledgeBase, CompoundGraph topology, Map<BioMetabolite,Double> compoundData){ + this.kb=new HashMap<>(); + for(BioPathway p : knowledgeBase.getPathwaysView()){ + kb.put(p,knowledgeBase.getMetabolitesFromPathway(p)); + } + this.g = topology; + this.data=compoundData; + } + + public PathwayTopologyAnalaysis(BioNetwork knowledgeBase, CompoundGraph topology, Set<BioMetabolite> compoundOfInterest){ + this.kb=new HashMap<>(); + for(BioPathway p : knowledgeBase.getPathwaysView()){ + kb.put(p,knowledgeBase.getMetabolitesFromPathway(p)); + } + this.g = topology; + Double defaultValue = 1.0/compoundOfInterest.size(); + this.data=compoundOfInterest.stream(). + collect(Collectors.toMap(c -> c, c -> defaultValue)); + } + public PathwayTopologyAnalaysis(Map<BioPathway,Collection<BioMetabolite>> knowledgeBase, CompoundGraph topology, Set<BioMetabolite> compoundOfInterest){ + this.kb=knowledgeBase; + this.g = topology; + Double defaultValue = 1.0/compoundOfInterest.size(); + this.data=compoundOfInterest.stream(). + collect(Collectors.toMap(c -> c, c -> defaultValue)); + } + + public Map<BioPathway,Double> runAnalysis(IndividualScoringStrategy scoring, AggregationStrategy aggregation){ + //From input data and given interaction network, compute topology score for each compound, using scoring strategy. + Map<BioMetabolite,Double> individualScore = scoring.apply(data,g); + //From knowledge base, get the pathway memberships and collect component's scores. + Map<BioPathway,Collection<Double>> pathwayScores = individualScoresByPathway(individualScore); + //Using aggregation strategy, compute for each pathway its final score from its constituents ones. + Map<BioPathway,Double> pathwayFinalScore = aggregation.apply(pathwayScores); + return pathwayFinalScore; + } + + private HashMap<BioPathway,Collection<Double>> individualScoresByPathway(Map<BioMetabolite,Double> individualScore){ + HashMap<BioPathway,Collection<Double>> pathwayScores = new HashMap<>(); + for(Map.Entry<BioPathway,Collection<BioMetabolite>> pathwayEntry : kb.entrySet()){ + List<Double> componentsScore = pathwayEntry.getValue().stream() + .filter(individualScore::containsKey) + .map(individualScore::get) + .collect(Collectors.toList()); + pathwayScores.put(pathwayEntry.getKey(),componentsScore); + } + return pathwayScores; + } + + public interface IndividualScoringStrategy extends BiFunction<Map<BioMetabolite,Double>,CompoundGraph,Map<BioMetabolite,Double>> { + + static IndividualScoringStrategy betweenness(){ + return (Map<BioMetabolite,Double> data,CompoundGraph graph) -> { + PathBasedCentrality<BioMetabolite, ReactionEdge,CompoundGraph> centralityAnalyser = new PathBasedCentrality<>(graph); + Map<BioMetabolite, Integer> betweenness = centralityAnalyser.getGeodesicBetweenness(); + return betweenness.entrySet().stream() + .filter(e -> data.containsKey(e.getKey())) + .collect(Collectors.toMap(e -> e.getKey(), e -> Double.valueOf(data.get(e.getKey())))); + }; + } + + static IndividualScoringStrategy outDegree(){ + return (Map<BioMetabolite,Double> data,CompoundGraph graph) -> data.keySet().stream() + .collect(Collectors.toMap(v -> v, v -> Double.valueOf(graph.outDegreeOf(v)))); + } + + static IndividualScoringStrategy neighbors(){ + return (Map<BioMetabolite,Double> data,CompoundGraph graph) -> data.keySet().stream() + .collect(Collectors.toMap(v -> v, v -> Double.valueOf(graph.neighborListOf(v).size()))); + } + + } + + public interface AggregationStrategy extends Function<Map<BioPathway,Collection<Double>>,Map<BioPathway,Double>> { + static AggregationStrategy rawSum(){ + return (Map<BioPathway,Collection<Double>> pathwayScores) -> + { + Map<BioPathway,Double> pathwayFinalScore = new HashMap<>(); + for(Map.Entry<BioPathway,Collection<Double>> e : pathwayScores.entrySet()){ + Double finalScoring = 0.0; + for(Double score : e.getValue()){ + finalScoring+=score; + } + pathwayFinalScore.put(e.getKey(),finalScoring); + } + return pathwayFinalScore; + }; + } + + + static AggregationStrategy normalizedSum(){ + return (Map<BioPathway,Collection<Double>> pathwayScores) -> + { + Map<BioPathway,Double> pathwayRawScore = rawSum().apply(pathwayScores); + Double sum = pathwayRawScore.values().stream().reduce(0.0, (x,y) -> x+y); + return pathwayRawScore.entrySet().stream() + .collect(Collectors.toMap(e -> e.getKey(), e -> e.getValue()/sum)); + }; + } + } +} -- GitLab From cc50281e255d9602bab92145aec25683e35c1855 Mon Sep 17 00:00:00 2001 From: cfrainay <clement.frainay@inrae.fr> Date: Tue, 7 Feb 2023 14:56:17 +0100 Subject: [PATCH 02/20] add javadoc --- ...ysis.java => PathwayTopologyAnalysis.java} | 79 ++++++++++++++++--- 1 file changed, 68 insertions(+), 11 deletions(-) rename met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/{PathwayTopologyAnalaysis.java => PathwayTopologyAnalysis.java} (59%) diff --git a/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalaysis.java b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java similarity index 59% rename from met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalaysis.java rename to met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java index 0346d0197..6942c5364 100644 --- a/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalaysis.java +++ b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java @@ -13,21 +13,33 @@ import java.util.function.Function; import java.util.stream.Collectors; /** - * Class to aggregate bioentity centrality into pathway score for Pathway Topology Analysis + * Class to aggregate metabolites' centrality into pathway score for Pathway Topology Analysis */ -public class PathwayTopologyAnalaysis { - - Map<BioPathway,Collection<BioMetabolite>> kb; - CompoundGraph g; - Map<BioMetabolite,Double> data; - - public PathwayTopologyAnalaysis(Map<BioPathway,Collection<BioMetabolite>> knowledgeBase, CompoundGraph topology, Map<BioMetabolite,Double> compoundData){ +public class PathwayTopologyAnalysis { + + private Map<BioPathway,Collection<BioMetabolite>> kb; + private CompoundGraph g; + private Map<BioMetabolite,Double> data; + + /** + * Instantiate class to perform Pathway Topology Analysis + * @param knowledgeBase Map affiliating metabolites to pathways + * @param topology CompoundGraph storing metabolites' relationships + * @param compoundData Map containing input data (metabolites abundance for example) + */ + public PathwayTopologyAnalysis(Map<BioPathway,Collection<BioMetabolite>> knowledgeBase, CompoundGraph topology, Map<BioMetabolite,Double> compoundData){ this.kb=knowledgeBase; this.g = topology; this.data=compoundData; } - public PathwayTopologyAnalaysis(BioNetwork knowledgeBase, CompoundGraph topology, Map<BioMetabolite,Double> compoundData){ + /** + * Instantiate class to perform Pathway Topology Analysis + * @param knowledgeBase BioNetwork affiliating metabolites to pathways + * @param topology CompoundGraph storing metabolites' relationships + * @param compoundData Map containing input data (metabolites abundance for example) + */ + public PathwayTopologyAnalysis(BioNetwork knowledgeBase, CompoundGraph topology, Map<BioMetabolite,Double> compoundData){ this.kb=new HashMap<>(); for(BioPathway p : knowledgeBase.getPathwaysView()){ kb.put(p,knowledgeBase.getMetabolitesFromPathway(p)); @@ -36,7 +48,13 @@ public class PathwayTopologyAnalaysis { this.data=compoundData; } - public PathwayTopologyAnalaysis(BioNetwork knowledgeBase, CompoundGraph topology, Set<BioMetabolite> compoundOfInterest){ + /** + * Instantiate class to perform Pathway Topology Analysis + * @param knowledgeBase BioNetwork affiliating metabolites to pathways + * @param topology CompoundGraph storing metabolites' relationships + * @param compoundOfInterest Set containing input data (significantly overrepresented metabolites for example) + */ + public PathwayTopologyAnalysis(BioNetwork knowledgeBase, CompoundGraph topology, Set<BioMetabolite> compoundOfInterest){ this.kb=new HashMap<>(); for(BioPathway p : knowledgeBase.getPathwaysView()){ kb.put(p,knowledgeBase.getMetabolitesFromPathway(p)); @@ -46,7 +64,13 @@ public class PathwayTopologyAnalaysis { this.data=compoundOfInterest.stream(). collect(Collectors.toMap(c -> c, c -> defaultValue)); } - public PathwayTopologyAnalaysis(Map<BioPathway,Collection<BioMetabolite>> knowledgeBase, CompoundGraph topology, Set<BioMetabolite> compoundOfInterest){ + /** + * Instantiate class to perform Pathway Topology Analysis + * @param knowledgeBase Map affiliating metabolites to pathways + * @param topology CompoundGraph storing metabolites' relationships + * @param compoundOfInterest Set containing input data (significantly overrepresented metabolites for example) + */ + public PathwayTopologyAnalysis(Map<BioPathway,Collection<BioMetabolite>> knowledgeBase, CompoundGraph topology, Set<BioMetabolite> compoundOfInterest){ this.kb=knowledgeBase; this.g = topology; Double defaultValue = 1.0/compoundOfInterest.size(); @@ -54,6 +78,12 @@ public class PathwayTopologyAnalaysis { collect(Collectors.toMap(c -> c, c -> defaultValue)); } + /** + * Method to compute Pathway Impact from data, according to a given strategy for individual scoring and aggregation + * @param scoring an IndividualScoringStrategy for scoring metabolites (centrality measure) + * @param aggregation an AggregationStrategy for aggregating component scores into pathway score. + * @return a Map of Pathways and their score + */ public Map<BioPathway,Double> runAnalysis(IndividualScoringStrategy scoring, AggregationStrategy aggregation){ //From input data and given interaction network, compute topology score for each compound, using scoring strategy. Map<BioMetabolite,Double> individualScore = scoring.apply(data,g); @@ -64,6 +94,9 @@ public class PathwayTopologyAnalaysis { return pathwayFinalScore; } + /* + From associated compound given by knowledge base, retrieve list of components' scores for each pathway + */ private HashMap<BioPathway,Collection<Double>> individualScoresByPathway(Map<BioMetabolite,Double> individualScore){ HashMap<BioPathway,Collection<Double>> pathwayScores = new HashMap<>(); for(Map.Entry<BioPathway,Collection<BioMetabolite>> pathwayEntry : kb.entrySet()){ @@ -76,8 +109,15 @@ public class PathwayTopologyAnalaysis { return pathwayScores; } + /** + * Interface for individual scoring strategy, computing metabolites impact + */ public interface IndividualScoringStrategy extends BiFunction<Map<BioMetabolite,Double>,CompoundGraph,Map<BioMetabolite,Double>> { + /** + * Use betweenness as scoring function, i.e. the number of shortest paths passing through a given node (excluding paths where it is the starting or ending node). + * @return a map of compounds and their respective impact score. + */ static IndividualScoringStrategy betweenness(){ return (Map<BioMetabolite,Double> data,CompoundGraph graph) -> { PathBasedCentrality<BioMetabolite, ReactionEdge,CompoundGraph> centralityAnalyser = new PathBasedCentrality<>(graph); @@ -88,11 +128,20 @@ public class PathwayTopologyAnalaysis { }; } + /** + * Use out degree as scoring function, i.e. the number of outgoing edges of a node. + * @return a map of compounds and their respective impact score. + */ static IndividualScoringStrategy outDegree(){ return (Map<BioMetabolite,Double> data,CompoundGraph graph) -> data.keySet().stream() .collect(Collectors.toMap(v -> v, v -> Double.valueOf(graph.outDegreeOf(v)))); } + /** + * Use number of neighbors as scoring function. Contrary to degree, this is not impacted by parallel edges + * (same pairs of nodes connected by different edges corresponding to different reactions) + * @return a map of compounds and their respective impact score. + */ static IndividualScoringStrategy neighbors(){ return (Map<BioMetabolite,Double> data,CompoundGraph graph) -> data.keySet().stream() .collect(Collectors.toMap(v -> v, v -> Double.valueOf(graph.neighborListOf(v).size()))); @@ -100,7 +149,15 @@ public class PathwayTopologyAnalaysis { } + /** + * Interface for aggregation strategy, computing pathway impact from constituting compounds' impact + */ public interface AggregationStrategy extends Function<Map<BioPathway,Collection<Double>>,Map<BioPathway,Double>> { + + /** + * Simply count the sum of compounds of interest scores as the final pathway score + * @return + */ static AggregationStrategy rawSum(){ return (Map<BioPathway,Collection<Double>> pathwayScores) -> { -- GitLab From 5c1f9fbaa4513f51ea3710f322b8a4b0d48a59be Mon Sep 17 00:00:00 2001 From: cfrainay <clement.frainay@inrae.fr> Date: Tue, 7 Feb 2023 17:32:00 +0100 Subject: [PATCH 03/20] add PageRank as individual scoring strategy. Also simplified pagerank computation in EigenvectorCentrality class --- .../analyze/PathwayTopologyAnalysis.java | 29 +++++++++++++++++- .../centrality/EigenVectorCentrality.java | 30 +++++++++++-------- 2 files changed, 46 insertions(+), 13 deletions(-) diff --git a/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java index 6942c5364..bcc67669f 100644 --- a/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java +++ b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java @@ -3,6 +3,7 @@ package fr.inrae.toulouse.metexplore.met4j_graph.computation.analyze; import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioMetabolite; import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioNetwork; import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioPathway; +import fr.inrae.toulouse.metexplore.met4j_graph.computation.analyze.centrality.EigenVectorCentrality; import fr.inrae.toulouse.metexplore.met4j_graph.computation.analyze.centrality.PathBasedCentrality; import fr.inrae.toulouse.metexplore.met4j_graph.core.compound.CompoundGraph; import fr.inrae.toulouse.metexplore.met4j_graph.core.compound.ReactionEdge; @@ -115,7 +116,7 @@ public class PathwayTopologyAnalysis { public interface IndividualScoringStrategy extends BiFunction<Map<BioMetabolite,Double>,CompoundGraph,Map<BioMetabolite,Double>> { /** - * Use betweenness as scoring function, i.e. the number of shortest paths passing through a given node (excluding paths where it is the starting or ending node). + * Use betweenness as scoring function, i.e. the proportion of shortest paths passing through a given node (excluding paths where it is the starting or ending node). * @return a map of compounds and their respective impact score. */ static IndividualScoringStrategy betweenness(){ @@ -128,6 +129,32 @@ public class PathwayTopologyAnalysis { }; } + /** + * Use PageRank as scoring function, a centrality measure that represents the likelihood that a random walk reach a particular node. + * This is run with default damping factor 0.85, using power iteration approximation with 15000 max iterations and 0.001 tolerance for convergence + * @return a map of compounds and their respective impact score. + */ + static IndividualScoringStrategy pageRank(){ + return IndividualScoringStrategy.pageRank(0.85,15000,0.001); + } + + /** + * Use PageRank as scoring function, a centrality measure that represents the likelihood that a random walk reach a particular node + * @param dampingFactor damping factor + * @param maxIter maximal number of iteration of the power method + * @param tolerance convergence tolerance + * @return a map of compounds and their respective impact score. + */ + static IndividualScoringStrategy pageRank(Double dampingFactor, int maxIter, double tolerance){ + return (Map<BioMetabolite,Double> data,CompoundGraph graph) -> { + EigenVectorCentrality<BioMetabolite, ReactionEdge,CompoundGraph> centralityAnalyser = new EigenVectorCentrality<>(graph); + centralityAnalyser.addJumpProb(graph.vertexSet().stream() + .map(BioMetabolite::getId).collect(Collectors.toSet()), dampingFactor); + Map<String, Double> pageRank = centralityAnalyser.computePowerMethodPageRank(dampingFactor,maxIter,tolerance); + return pageRank.entrySet().stream().collect(Collectors.toMap(e -> graph.getVertex(e.getKey()), e -> e.getValue())); + }; + } + /** * Use out degree as scoring function, i.e. the number of outgoing edges of a node. * @return a map of compounds and their respective impact score. diff --git a/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/centrality/EigenVectorCentrality.java b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/centrality/EigenVectorCentrality.java index d3f0a32c5..d1893e22c 100644 --- a/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/centrality/EigenVectorCentrality.java +++ b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/centrality/EigenVectorCentrality.java @@ -35,19 +35,15 @@ */ package fr.inrae.toulouse.metexplore.met4j_graph.computation.analyze.centrality; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; - +import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioEntity; import fr.inrae.toulouse.metexplore.met4j_graph.computation.utils.ComputeAdjacencyMatrix; import fr.inrae.toulouse.metexplore.met4j_graph.core.BioGraph; import fr.inrae.toulouse.metexplore.met4j_graph.core.Edge; import fr.inrae.toulouse.metexplore.met4j_mathUtils.matrix.BioMatrix; import fr.inrae.toulouse.metexplore.met4j_mathUtils.matrix.EjmlMatrix; -import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioEntity; + +import java.util.*; +import java.util.stream.Collectors; /** * Class to compute the eigen vector centrality of each vertex in a BioGraph. @@ -134,17 +130,27 @@ public class EigenVectorCentrality<V extends BioEntity, E extends Edge<V>, G ext /** * Gets a map with all nodes id as key and global page rank * - * @param d the damping factor + * @param dampingFactor the damping factor * @return the map with node identifier and corresponding centrality */ - public HashMap<String, Double> computePageRank(double d){ + public HashMap<String, Double> computePageRank(double dampingFactor){ BioMatrix tmp = adjacencyMatrix.copy(); - addJumpProb(adjacencyMatrix.getRowLabelMap().keySet(),d); + addJumpProb(adjacencyMatrix.getRowLabelMap().keySet(),1-dampingFactor); HashMap<String, Double> result = computeEigenVectorCentrality(); adjacencyMatrix = tmp; return result; } + public HashMap<String, Double> computePowerMethodPageRank(double dampingFactor, int maxNbOfIter, double tolerance){ + BioMatrix tmp = adjacencyMatrix.copy(); + Set<String> allNodes = adjacencyMatrix.getRowLabelMap().keySet(); + addJumpProb(allNodes,1 - dampingFactor); + Map<String, Double> seeds = allNodes.stream().collect(Collectors.toMap(k -> k, k -> 1.0 / allNodes.size())); + + HashMap<String, Double> result = powerIteration(seeds, maxNbOfIter, tolerance); + adjacencyMatrix = tmp; + return result; + } /** * add a constant probability to "jump" (i.e. go to another node without necessarily following an edge) to defined set of node @@ -329,7 +335,7 @@ public class EigenVectorCentrality<V extends BioEntity, E extends Edge<V>, G ext * @param tol the tolerance, if the max delta between two iteration is below this value, the result is returned * @return map with node id as key and eigen vector centrality as value */ - public HashMap<String, Double> powerIteration(HashMap<String, Double> seeds, int maxIter, double tol){ + public HashMap<String, Double> powerIteration(Map<String, Double> seeds, int maxIter, double tol){ BioMatrix rank = new EjmlMatrix(1, adjacencyMatrix.numCols()); for(Map.Entry<String,Integer> entry : adjacencyMatrix.getRowLabelMap().entrySet()){ String e = entry.getKey(); -- GitLab From b6b2d1b76e0b32f92abb3cc23f145157fa15b457 Mon Sep 17 00:00:00 2001 From: cfrainay <clement.frainay@inrae.fr> Date: Wed, 8 Feb 2023 17:29:01 +0100 Subject: [PATCH 04/20] add normalization --- .../analyze/PathwayTopologyAnalysis.java | 55 ++++++++++++++----- 1 file changed, 41 insertions(+), 14 deletions(-) diff --git a/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java index bcc67669f..2d7f37017 100644 --- a/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java +++ b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java @@ -12,6 +12,7 @@ import java.util.*; import java.util.function.BiFunction; import java.util.function.Function; import java.util.stream.Collectors; +import java.util.stream.Stream; /** * Class to aggregate metabolites' centrality into pathway score for Pathway Topology Analysis @@ -22,6 +23,8 @@ public class PathwayTopologyAnalysis { private CompoundGraph g; private Map<BioMetabolite,Double> data; + private boolean normalize; + /** * Instantiate class to perform Pathway Topology Analysis * @param knowledgeBase Map affiliating metabolites to pathways @@ -65,6 +68,7 @@ public class PathwayTopologyAnalysis { this.data=compoundOfInterest.stream(). collect(Collectors.toMap(c -> c, c -> defaultValue)); } + /** * Instantiate class to perform Pathway Topology Analysis * @param knowledgeBase Map affiliating metabolites to pathways @@ -79,14 +83,48 @@ public class PathwayTopologyAnalysis { collect(Collectors.toMap(c -> c, c -> defaultValue)); } + /** + * Use normalized score, using the ratio between the raw pathway score and the maximum score by pathway + * @return a PathwayTopologyAnalysis object + */ + public PathwayTopologyAnalysis useNormalization(){ + this.normalize=true; + return this; + } + /** * Method to compute Pathway Impact from data, according to a given strategy for individual scoring and aggregation * @param scoring an IndividualScoringStrategy for scoring metabolites (centrality measure) * @param aggregation an AggregationStrategy for aggregating component scores into pathway score. * @return a Map of Pathways and their score */ - public Map<BioPathway,Double> runAnalysis(IndividualScoringStrategy scoring, AggregationStrategy aggregation){ - //From input data and given interaction network, compute topology score for each compound, using scoring strategy. + public Map<BioPathway,Double> run(IndividualScoringStrategy scoring, AggregationStrategy aggregation){ + Map<BioPathway,Double> pathwayFinalScore = computePathwayScore(data, g, scoring, aggregation); + if(normalize){ + //create background data (i.e dataset with all compounds) + Set<BioMetabolite> background = kb.values().stream().flatMap(Collection::stream) + .collect(Collectors.toSet()); + Map<BioMetabolite, Double> backgroundDefaultValues = background + .stream().collect(Collectors.toMap(k -> k, k-> 1.0/background.size())); + + //compute pathway score as if all their compounds were in dataset (recompute individual score for all compounds) + Map<BioPathway,Double> byPathwayBackgroundScore = computePathwayScore(backgroundDefaultValues, g, scoring, aggregation); + + //set final pathway score as ratio between pathway score computed from data and theoretical maximal pathway score + pathwayFinalScore = Stream.concat(pathwayFinalScore.entrySet().stream(), byPathwayBackgroundScore.entrySet().stream()) + .collect(Collectors.toMap( + Map.Entry::getKey, + Map.Entry::getValue, + (value1, value2) -> value1/value2)); + } + return pathwayFinalScore; + } + + /* + Compute topology pathway analysis + */ + private Map<BioPathway,Double> computePathwayScore(Map<BioMetabolite, Double> data, CompoundGraph g, IndividualScoringStrategy scoring, AggregationStrategy aggregation){ + //From compounds and given interaction network, compute topology score for each compound, using scoring strategy. Map<BioMetabolite,Double> individualScore = scoring.apply(data,g); //From knowledge base, get the pathway memberships and collect component's scores. Map<BioPathway,Collection<Double>> pathwayScores = individualScoresByPathway(individualScore); @@ -98,7 +136,7 @@ public class PathwayTopologyAnalysis { /* From associated compound given by knowledge base, retrieve list of components' scores for each pathway */ - private HashMap<BioPathway,Collection<Double>> individualScoresByPathway(Map<BioMetabolite,Double> individualScore){ + private Map<BioPathway,Collection<Double>> individualScoresByPathway(Map<BioMetabolite,Double> individualScore){ HashMap<BioPathway,Collection<Double>> pathwayScores = new HashMap<>(); for(Map.Entry<BioPathway,Collection<BioMetabolite>> pathwayEntry : kb.entrySet()){ List<Double> componentsScore = pathwayEntry.getValue().stream() @@ -199,16 +237,5 @@ public class PathwayTopologyAnalysis { return pathwayFinalScore; }; } - - - static AggregationStrategy normalizedSum(){ - return (Map<BioPathway,Collection<Double>> pathwayScores) -> - { - Map<BioPathway,Double> pathwayRawScore = rawSum().apply(pathwayScores); - Double sum = pathwayRawScore.values().stream().reduce(0.0, (x,y) -> x+y); - return pathwayRawScore.entrySet().stream() - .collect(Collectors.toMap(e -> e.getKey(), e -> e.getValue()/sum)); - }; - } } } -- GitLab From 1847d29afa0f477e5e2a3f0ee5e47bf69aa7812c Mon Sep 17 00:00:00 2001 From: cfrainay <clement.frainay@inrae.fr> Date: Thu, 6 Jul 2023 17:01:15 +0200 Subject: [PATCH 05/20] fix betweenness scoring --- .../computation/analyze/PathwayTopologyAnalysis.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java index 2d7f37017..7f1f44007 100644 --- a/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java +++ b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java @@ -163,7 +163,7 @@ public class PathwayTopologyAnalysis { Map<BioMetabolite, Integer> betweenness = centralityAnalyser.getGeodesicBetweenness(); return betweenness.entrySet().stream() .filter(e -> data.containsKey(e.getKey())) - .collect(Collectors.toMap(e -> e.getKey(), e -> Double.valueOf(data.get(e.getKey())))); + .collect(Collectors.toMap(e -> e.getKey(), e -> Double.valueOf(betweenness.get(e.getKey())))); }; } -- GitLab From 28716e86e7aa1d57bf8886d93b2ba84174cadb89 Mon Sep 17 00:00:00 2001 From: cfrainay <clement.frainay@inrae.fr> Date: Thu, 6 Jul 2023 17:07:58 +0200 Subject: [PATCH 06/20] REMOVE DATA WEIGHT --- .../analyze/PathwayTopologyAnalysis.java | 59 ++++--------------- 1 file changed, 13 insertions(+), 46 deletions(-) diff --git a/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java index 7f1f44007..ef4dd399b 100644 --- a/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java +++ b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java @@ -21,7 +21,7 @@ public class PathwayTopologyAnalysis { private Map<BioPathway,Collection<BioMetabolite>> kb; private CompoundGraph g; - private Map<BioMetabolite,Double> data; + private Set<BioMetabolite> data; private boolean normalize; @@ -29,27 +29,12 @@ public class PathwayTopologyAnalysis { * Instantiate class to perform Pathway Topology Analysis * @param knowledgeBase Map affiliating metabolites to pathways * @param topology CompoundGraph storing metabolites' relationships - * @param compoundData Map containing input data (metabolites abundance for example) + * @param compoundOfInterest Set containing input data (significantly overrepresented metabolites for example) */ - public PathwayTopologyAnalysis(Map<BioPathway,Collection<BioMetabolite>> knowledgeBase, CompoundGraph topology, Map<BioMetabolite,Double> compoundData){ + public PathwayTopologyAnalysis(Map<BioPathway,Collection<BioMetabolite>> knowledgeBase, CompoundGraph topology, Set<BioMetabolite> compoundOfInterest){ this.kb=knowledgeBase; this.g = topology; - this.data=compoundData; - } - - /** - * Instantiate class to perform Pathway Topology Analysis - * @param knowledgeBase BioNetwork affiliating metabolites to pathways - * @param topology CompoundGraph storing metabolites' relationships - * @param compoundData Map containing input data (metabolites abundance for example) - */ - public PathwayTopologyAnalysis(BioNetwork knowledgeBase, CompoundGraph topology, Map<BioMetabolite,Double> compoundData){ - this.kb=new HashMap<>(); - for(BioPathway p : knowledgeBase.getPathwaysView()){ - kb.put(p,knowledgeBase.getMetabolitesFromPathway(p)); - } - this.g = topology; - this.data=compoundData; + this.data=compoundOfInterest; } /** @@ -64,23 +49,7 @@ public class PathwayTopologyAnalysis { kb.put(p,knowledgeBase.getMetabolitesFromPathway(p)); } this.g = topology; - Double defaultValue = 1.0/compoundOfInterest.size(); - this.data=compoundOfInterest.stream(). - collect(Collectors.toMap(c -> c, c -> defaultValue)); - } - - /** - * Instantiate class to perform Pathway Topology Analysis - * @param knowledgeBase Map affiliating metabolites to pathways - * @param topology CompoundGraph storing metabolites' relationships - * @param compoundOfInterest Set containing input data (significantly overrepresented metabolites for example) - */ - public PathwayTopologyAnalysis(Map<BioPathway,Collection<BioMetabolite>> knowledgeBase, CompoundGraph topology, Set<BioMetabolite> compoundOfInterest){ - this.kb=knowledgeBase; - this.g = topology; - Double defaultValue = 1.0/compoundOfInterest.size(); - this.data=compoundOfInterest.stream(). - collect(Collectors.toMap(c -> c, c -> defaultValue)); + this.data=compoundOfInterest; } /** @@ -104,11 +73,9 @@ public class PathwayTopologyAnalysis { //create background data (i.e dataset with all compounds) Set<BioMetabolite> background = kb.values().stream().flatMap(Collection::stream) .collect(Collectors.toSet()); - Map<BioMetabolite, Double> backgroundDefaultValues = background - .stream().collect(Collectors.toMap(k -> k, k-> 1.0/background.size())); //compute pathway score as if all their compounds were in dataset (recompute individual score for all compounds) - Map<BioPathway,Double> byPathwayBackgroundScore = computePathwayScore(backgroundDefaultValues, g, scoring, aggregation); + Map<BioPathway,Double> byPathwayBackgroundScore = computePathwayScore(background, g, scoring, aggregation); //set final pathway score as ratio between pathway score computed from data and theoretical maximal pathway score pathwayFinalScore = Stream.concat(pathwayFinalScore.entrySet().stream(), byPathwayBackgroundScore.entrySet().stream()) @@ -123,7 +90,7 @@ public class PathwayTopologyAnalysis { /* Compute topology pathway analysis */ - private Map<BioPathway,Double> computePathwayScore(Map<BioMetabolite, Double> data, CompoundGraph g, IndividualScoringStrategy scoring, AggregationStrategy aggregation){ + private Map<BioPathway,Double> computePathwayScore(Set<BioMetabolite> data, CompoundGraph g, IndividualScoringStrategy scoring, AggregationStrategy aggregation){ //From compounds and given interaction network, compute topology score for each compound, using scoring strategy. Map<BioMetabolite,Double> individualScore = scoring.apply(data,g); //From knowledge base, get the pathway memberships and collect component's scores. @@ -151,18 +118,18 @@ public class PathwayTopologyAnalysis { /** * Interface for individual scoring strategy, computing metabolites impact */ - public interface IndividualScoringStrategy extends BiFunction<Map<BioMetabolite,Double>,CompoundGraph,Map<BioMetabolite,Double>> { + public interface IndividualScoringStrategy extends BiFunction<Set<BioMetabolite>,CompoundGraph,Map<BioMetabolite,Double>> { /** * Use betweenness as scoring function, i.e. the proportion of shortest paths passing through a given node (excluding paths where it is the starting or ending node). * @return a map of compounds and their respective impact score. */ static IndividualScoringStrategy betweenness(){ - return (Map<BioMetabolite,Double> data,CompoundGraph graph) -> { + return (Set<BioMetabolite> data,CompoundGraph graph) -> { PathBasedCentrality<BioMetabolite, ReactionEdge,CompoundGraph> centralityAnalyser = new PathBasedCentrality<>(graph); Map<BioMetabolite, Integer> betweenness = centralityAnalyser.getGeodesicBetweenness(); return betweenness.entrySet().stream() - .filter(e -> data.containsKey(e.getKey())) + .filter(e -> data.contains(e.getKey())) .collect(Collectors.toMap(e -> e.getKey(), e -> Double.valueOf(betweenness.get(e.getKey())))); }; } @@ -184,7 +151,7 @@ public class PathwayTopologyAnalysis { * @return a map of compounds and their respective impact score. */ static IndividualScoringStrategy pageRank(Double dampingFactor, int maxIter, double tolerance){ - return (Map<BioMetabolite,Double> data,CompoundGraph graph) -> { + return (Set<BioMetabolite> data,CompoundGraph graph) -> { EigenVectorCentrality<BioMetabolite, ReactionEdge,CompoundGraph> centralityAnalyser = new EigenVectorCentrality<>(graph); centralityAnalyser.addJumpProb(graph.vertexSet().stream() .map(BioMetabolite::getId).collect(Collectors.toSet()), dampingFactor); @@ -198,7 +165,7 @@ public class PathwayTopologyAnalysis { * @return a map of compounds and their respective impact score. */ static IndividualScoringStrategy outDegree(){ - return (Map<BioMetabolite,Double> data,CompoundGraph graph) -> data.keySet().stream() + return (Set<BioMetabolite> data,CompoundGraph graph) -> data.stream() .collect(Collectors.toMap(v -> v, v -> Double.valueOf(graph.outDegreeOf(v)))); } @@ -208,7 +175,7 @@ public class PathwayTopologyAnalysis { * @return a map of compounds and their respective impact score. */ static IndividualScoringStrategy neighbors(){ - return (Map<BioMetabolite,Double> data,CompoundGraph graph) -> data.keySet().stream() + return (Set<BioMetabolite> data,CompoundGraph graph) -> data.stream() .collect(Collectors.toMap(v -> v, v -> Double.valueOf(graph.neighborListOf(v).size()))); } -- GitLab From c5d9d69c4cc6ace3a01661928cb2966576a5f7a4 Mon Sep 17 00:00:00 2001 From: cfrainay <clement.frainay@inrae.fr> Date: Thu, 6 Jul 2023 17:46:18 +0200 Subject: [PATCH 07/20] add filter to pagerank individual score (otherwise return whole score map) --- .../computation/analyze/PathwayTopologyAnalysis.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java index ef4dd399b..15455dc73 100644 --- a/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java +++ b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java @@ -156,7 +156,9 @@ public class PathwayTopologyAnalysis { centralityAnalyser.addJumpProb(graph.vertexSet().stream() .map(BioMetabolite::getId).collect(Collectors.toSet()), dampingFactor); Map<String, Double> pageRank = centralityAnalyser.computePowerMethodPageRank(dampingFactor,maxIter,tolerance); - return pageRank.entrySet().stream().collect(Collectors.toMap(e -> graph.getVertex(e.getKey()), e -> e.getValue())); + return pageRank.entrySet().stream() + .filter(e -> data.stream().map(BioMetabolite::getId).collect(Collectors.toSet()).contains(e.getKey())) + .collect(Collectors.toMap(e -> graph.getVertex(e.getKey()), e -> e.getValue())); }; } -- GitLab From 50487540f5c8106403ba43734d444ca5d6388ef0 Mon Sep 17 00:00:00 2001 From: cfrainay <clement.frainay@inrae.fr> Date: Thu, 6 Jul 2023 17:46:35 +0200 Subject: [PATCH 08/20] add unit tests d --- .../TestPathwayTopologyAnalysis.java | 266 ++++++++++++++++++ 1 file changed, 266 insertions(+) create mode 100644 met4j-graph/src/test/java/fr/inrae/toulouse/metexplore/met4j_graph/TestPathwayTopologyAnalysis.java diff --git a/met4j-graph/src/test/java/fr/inrae/toulouse/metexplore/met4j_graph/TestPathwayTopologyAnalysis.java b/met4j-graph/src/test/java/fr/inrae/toulouse/metexplore/met4j_graph/TestPathwayTopologyAnalysis.java new file mode 100644 index 000000000..c84d30850 --- /dev/null +++ b/met4j-graph/src/test/java/fr/inrae/toulouse/metexplore/met4j_graph/TestPathwayTopologyAnalysis.java @@ -0,0 +1,266 @@ +package fr.inrae.toulouse.metexplore.met4j_graph; + +import fr.inrae.toulouse.metexplore.met4j_core.biodata.*; +import fr.inrae.toulouse.metexplore.met4j_core.biodata.collection.BioCollection; +import fr.inrae.toulouse.metexplore.met4j_graph.computation.analyze.PathwayTopologyAnalysis; +import fr.inrae.toulouse.metexplore.met4j_graph.computation.analyze.centrality.EigenVectorCentrality; +import fr.inrae.toulouse.metexplore.met4j_graph.computation.analyze.centrality.PathBasedCentrality; +import fr.inrae.toulouse.metexplore.met4j_graph.core.compound.CompoundGraph; +import fr.inrae.toulouse.metexplore.met4j_graph.core.compound.ReactionEdge; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; + +import static org.junit.Assert.assertEquals; + +public class TestPathwayTopologyAnalysis { + + public static CompoundGraph toyGraph; + public static BioNetwork toyNetwork; + public static BioMetabolite a, b, c, d, e, f, h, g; + public static BioReaction r1,r2,r3,r4,r5,r6,r7,r8; + public static BioPathway x,y,z; + public static BioCompartment na; + public static PathBasedCentrality<BioMetabolite, ReactionEdge, CompoundGraph> toyMeasure; + + @BeforeClass + public static void init() { + + a = new BioMetabolite("a"); + b = new BioMetabolite("b"); + c = new BioMetabolite("c"); + d = new BioMetabolite("d"); + e = new BioMetabolite("e"); + f = new BioMetabolite("f"); + h = new BioMetabolite("h"); + g = new BioMetabolite("g"); + + r1 = new BioReaction("r1");r1.setReversible(true); + r2 = new BioReaction("r2");r2.setReversible(true); + r3 = new BioReaction("r3");r3.setReversible(true); + r4 = new BioReaction("r4");r4.setReversible(true); + r5 = new BioReaction("r5");r5.setReversible(true); + r6 = new BioReaction("r6");r6.setReversible(true); + r7 = new BioReaction("r7");r7.setReversible(true); + r8 = new BioReaction("r8");r8.setReversible(true); + + x = new BioPathway("x"); + y = new BioPathway("y"); + z = new BioPathway("z"); + + na = new BioCompartment("NA"); + + ReactionEdge ab = new ReactionEdge(a, b, r1); + ReactionEdge ba = new ReactionEdge(b, a, r1); + ReactionEdge bc = new ReactionEdge(b, c, r2); + ReactionEdge cb = new ReactionEdge(c, b, r2); + ReactionEdge cd = new ReactionEdge(c, d, r3); + ReactionEdge dc = new ReactionEdge(d, c, r3); + ReactionEdge de = new ReactionEdge(d, e, r4); + ReactionEdge ed = new ReactionEdge(e, d, r4); + ReactionEdge ec = new ReactionEdge(e, c, r5); + ReactionEdge ce = new ReactionEdge(c, e, r5); + ReactionEdge af = new ReactionEdge(a, f, r6); + ReactionEdge fa = new ReactionEdge(f, a, r6); + ReactionEdge fg = new ReactionEdge(f, g, r7); + ReactionEdge gf = new ReactionEdge(g, f, r7); + ReactionEdge ga = new ReactionEdge(g, a, r8); + ReactionEdge ag = new ReactionEdge(a, g, r8); + + toyNetwork=new BioNetwork(); + toyNetwork.add(a,b,c,d,e,f,g); + toyNetwork.add(r1,r2,r3,r4,r5,r6,r7,r8); + toyNetwork.add(x,y,z); + toyNetwork.add(na); + + toyNetwork.affectToCompartment(na,a,b,c,d,e,f,g); + + toyNetwork.affectLeft(r1,1.0,na,a); + toyNetwork.affectRight(r1,1.0,na,b); + toyNetwork.affectLeft(r2,1.0,na,b); + toyNetwork.affectRight(r2,1.0,na,c); + toyNetwork.affectLeft(r3,1.0,na,c); + toyNetwork.affectRight(r3,1.0,na,d); + toyNetwork.affectLeft(r4,1.0,na,d); + toyNetwork.affectRight(r4,1.0,na,e); + toyNetwork.affectLeft(r5,1.0,na,e); + toyNetwork.affectRight(r5,1.0,na,c); + toyNetwork.affectLeft(r6,1.0,na,a); + toyNetwork.affectRight(r6,1.0,na,f); + toyNetwork.affectLeft(r7,1.0,na,f); + toyNetwork.affectRight(r7,1.0,na,g); + toyNetwork.affectLeft(r8,1.0,na,g); + toyNetwork.affectRight(r8,1.0,na,a); + + toyNetwork.affectToPathway(x,r1,r6,r7,r8); + toyNetwork.affectToPathway(y,r2); + toyNetwork.affectToPathway(z,r3,r4,r5); + + toyGraph = new CompoundGraph(); + toyGraph.addVertex(a); + toyGraph.addVertex(b); + toyGraph.addVertex(c); + toyGraph.addVertex(d); + toyGraph.addVertex(e); + toyGraph.addVertex(f); + toyGraph.addVertex(g); + toyGraph.addEdge(a, b, ab); + toyGraph.addEdge(b, c, bc); + toyGraph.addEdge(c, d, cd); + toyGraph.addEdge(d, e, de); + toyGraph.addEdge(e, c, ec); + toyGraph.addEdge(a, f, af); + toyGraph.addEdge(f, g, fg); + toyGraph.addEdge(g, a, ga); + toyGraph.addEdge(b, a, ba); + toyGraph.addEdge(c, b, cb); + toyGraph.addEdge(d, c, dc); + toyGraph.addEdge(e, d, ed); + toyGraph.addEdge(c, e, ce); + toyGraph.addEdge(f, a, fa); + toyGraph.addEdge(g, f, gf); + toyGraph.addEdge(a, g, ag); + + toyMeasure = new PathBasedCentrality<>(toyGraph); + } + + @Test + public void testBetweenness() { + + HashSet<BioMetabolite> noi = new HashSet<>(); + noi.add(a); + noi.add(b); + noi.add(e); + PathwayTopologyAnalysis analysis = new PathwayTopologyAnalysis(toyNetwork,toyGraph,noi); + + Map<BioMetabolite, Integer> toyBetweenness = toyMeasure.getGeodesicBetweenness(); + + Map<BioPathway,Double> res = analysis.run(PathwayTopologyAnalysis.IndividualScoringStrategy.betweenness(), + PathwayTopologyAnalysis.AggregationStrategy.rawSum()); + + assertEquals(toyBetweenness.get(a)+toyBetweenness.get(b), res.get(x), 0.00000001); + assertEquals(toyBetweenness.get(b), res.get(y), 0.00000001); + assertEquals(toyBetweenness.get(e), res.get(z), 0.00000001); + } + + @Test + public void testNormalization() { + + HashSet<BioMetabolite> noi = new HashSet<>(); + noi.add(a); + noi.add(b); + noi.add(e); + PathwayTopologyAnalysis analysis = new PathwayTopologyAnalysis(toyNetwork,toyGraph,noi).useNormalization(); + + Map<BioMetabolite, Integer> toyBetweenness = toyMeasure.getGeodesicBetweenness(); + + Map<BioPathway,Double> res = analysis.run(PathwayTopologyAnalysis.IndividualScoringStrategy.betweenness(), + PathwayTopologyAnalysis.AggregationStrategy.rawSum()); + + + assertEquals(Double.valueOf(toyBetweenness.get(a)+toyBetweenness.get(b)) / Double.valueOf(toyBetweenness.get(a)+toyBetweenness.get(b)+toyBetweenness.get(f)+toyBetweenness.get(g)), res.get(x), 0.00000001); + assertEquals(Double.valueOf(toyBetweenness.get(b)) / Double.valueOf(toyBetweenness.get(b)+toyBetweenness.get(c)), res.get(y), 0.00000001); + assertEquals(Double.valueOf(toyBetweenness.get(e)) / Double.valueOf(toyBetweenness.get(c)+toyBetweenness.get(d)+toyBetweenness.get(e)), res.get(z), 0.00000001); + } + + @Test + public void testBetweennessII() { + + HashSet<BioMetabolite> noi = new HashSet<>(); + noi.add(a); + noi.add(b); + PathwayTopologyAnalysis analysis = new PathwayTopologyAnalysis(toyNetwork,toyGraph,noi); + + Map<BioMetabolite, Integer> toyBetweenness = toyMeasure.getGeodesicBetweenness(); + + Map<BioPathway,Double> res = analysis.run(PathwayTopologyAnalysis.IndividualScoringStrategy.betweenness(), + PathwayTopologyAnalysis.AggregationStrategy.rawSum()); + + assertEquals(toyBetweenness.get(a)+toyBetweenness.get(b), res.get(x), 0.00000001); + assertEquals(toyBetweenness.get(b), res.get(y), 0.00000001); + assertEquals(0, res.get(z), 0.00000001); + } + + @Test + public void testOutDegree() { + + HashSet<BioMetabolite> noi = new HashSet<>(); + noi.add(a); + noi.add(b); + noi.add(e); + PathwayTopologyAnalysis analysis = new PathwayTopologyAnalysis(toyNetwork,toyGraph,noi); + + Map<BioPathway,Double> res = analysis.run(PathwayTopologyAnalysis.IndividualScoringStrategy.outDegree(), + PathwayTopologyAnalysis.AggregationStrategy.rawSum()); + + assertEquals(5, res.get(x), 0.00000001); + assertEquals(2, res.get(y), 0.00000001); + assertEquals(2,res.get(z), 0.00000001); + } + + @Test + public void testMapConstructor() { + + HashSet<BioMetabolite> noi = new HashSet<>(); + noi.add(a); + noi.add(b); + noi.add(e); + HashMap<BioPathway, Collection<BioMetabolite>> kb = new HashMap<>(); + BioCollection<BioMetabolite> xCpds = new BioCollection<>();xCpds.add(g,f,a,b);kb.put(x,xCpds); + BioCollection<BioMetabolite> yCpds = new BioCollection<>();yCpds.add(b,c);kb.put(y,yCpds); + BioCollection<BioMetabolite> zCpds = new BioCollection<>();zCpds.add(c,e,d);kb.put(z,zCpds); + + + PathwayTopologyAnalysis analysis = new PathwayTopologyAnalysis(kb,toyGraph,noi); + + Map<BioPathway,Double> res = analysis.run(PathwayTopologyAnalysis.IndividualScoringStrategy.outDegree(), + PathwayTopologyAnalysis.AggregationStrategy.rawSum()); + + assertEquals(5, res.get(x), 0.00000001); + assertEquals(2, res.get(y), 0.00000001); + assertEquals(2,res.get(z), 0.00000001); + } + + @Test + public void testNeighbours() { + + HashSet<BioMetabolite> noi = new HashSet<>(); + noi.add(a); + noi.add(b); + noi.add(e); + PathwayTopologyAnalysis analysis = new PathwayTopologyAnalysis(toyNetwork,toyGraph,noi); + + Map<BioPathway,Double> res = analysis.run(PathwayTopologyAnalysis.IndividualScoringStrategy.neighbors(), + PathwayTopologyAnalysis.AggregationStrategy.rawSum()); + + assertEquals(5, res.get(x), 0.00000001); + assertEquals(2, res.get(y), 0.00000001); + assertEquals(2,res.get(z), 0.00000001); + } + + @Test + public void testPageRank() { + + + HashSet<BioMetabolite> noi = new HashSet<>(); + noi.add(a); + noi.add(b); + noi.add(e); + PathwayTopologyAnalysis analysis = new PathwayTopologyAnalysis(toyNetwork,toyGraph,noi); + + EigenVectorCentrality<BioMetabolite,ReactionEdge,CompoundGraph> toyMeasure2 = new EigenVectorCentrality<>(toyGraph); + HashMap<String, Double> toyPageRank = toyMeasure2.computePowerMethodPageRank(0.85,15000,0.001); + + Map<BioPathway,Double> res = analysis.run(PathwayTopologyAnalysis.IndividualScoringStrategy.pageRank(0.85,15000,0.001), + PathwayTopologyAnalysis.AggregationStrategy.rawSum()); + + assertEquals(toyPageRank.get(a.getId())+toyPageRank.get(b.getId()), res.get(x), 0.00000001); + assertEquals(toyPageRank.get(b.getId()), res.get(y), 0.00000001); + assertEquals(toyPageRank.get(e.getId()), res.get(z), 0.00000001); + + } +} -- GitLab From c03abad5b906823a01dcdf10c66f0e031bd75621 Mon Sep 17 00:00:00 2001 From: cfrainay <clement.frainay@inrae.fr> Date: Thu, 6 Jul 2023 18:07:51 +0200 Subject: [PATCH 09/20] Stub TPA app --- .../TopologicalPathwayAnalysis.java | 227 ++++++++++++++++++ 1 file changed, 227 insertions(+) create mode 100644 met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java diff --git a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java new file mode 100644 index 000000000..c5034a7b3 --- /dev/null +++ b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java @@ -0,0 +1,227 @@ +package fr.inrae.toulouse.metexplore.met4j_toolbox.networkAnalysis; + +import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioMetabolite; +import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioNetwork; +import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioPathway; +import fr.inrae.toulouse.metexplore.met4j_core.biodata.collection.BioCollection; +import fr.inrae.toulouse.metexplore.met4j_graph.computation.analyze.PathwayTopologyAnalysis; +import fr.inrae.toulouse.metexplore.met4j_graph.computation.connect.weighting.*; +import fr.inrae.toulouse.metexplore.met4j_graph.computation.transform.VertexContraction; +import fr.inrae.toulouse.metexplore.met4j_graph.core.WeightingPolicy; +import fr.inrae.toulouse.metexplore.met4j_graph.core.compound.CompoundGraph; +import fr.inrae.toulouse.metexplore.met4j_graph.core.compound.ReactionEdge; +import fr.inrae.toulouse.metexplore.met4j_graph.io.Bionetwork2BioGraph; +import fr.inrae.toulouse.metexplore.met4j_graph.io.NodeMapping; +import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.JsbmlReader; +import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.Met4jSbmlReaderException; +import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.plugin.FBCParser; +import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.plugin.GroupPathwayParser; +import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.plugin.NotesParser; +import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.plugin.PackageParser; +import fr.inrae.toulouse.metexplore.met4j_toolbox.generic.AbstractMet4jApplication; +import fr.inrae.toulouse.metexplore.met4j_toolbox.generic.annotations.EnumFormats; +import fr.inrae.toulouse.metexplore.met4j_toolbox.generic.annotations.EnumParameterTypes; +import fr.inrae.toulouse.metexplore.met4j_toolbox.generic.annotations.Format; +import fr.inrae.toulouse.metexplore.met4j_toolbox.generic.annotations.ParameterType; +import org.kohsuke.args4j.Option; + +import java.io.FileWriter; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Map; + +public class TopologicalPathwayAnalysis extends AbstractMet4jApplication { + + @Format(name= EnumFormats.Sbml) + @ParameterType(name= EnumParameterTypes.InputFile) + @Option(name = "-s", usage = "input SBML file", required = true) + public String inputPath = null; + + @ParameterType(name= EnumParameterTypes.InputFile) + @Format(name= EnumFormats.Txt) + @Option(name = "-sc", usage = "input Side compound file (recommended)", required = false) + public String inputSide = null; + + @ParameterType(name= EnumParameterTypes.OutputFile) + @Format(name= EnumFormats.Gml) + @Option(name = "-o", usage = "output Graph file", required = true) + public String outputPath = null; + + enum strategy {no, by_name,by_id} + @Option(name = "-mc", aliases = {"--mergecomp"}, usage = "merge compartments. " + + "Use names if consistent and unambiguous across compartments, or identifiers if compartment suffix is present (id in form \"xxx_y\" with xxx as base identifier and y as compartment label).") + public strategy mergingStrat = strategy.no; + public String idRegex = "^(\\w+)_\\w$"; + + + @Option(name = "-ri", aliases = {"--removeIsolatedNodes"}, usage = "remove isolated nodes", required = false) + public boolean removeIsolated = false; + + @ParameterType(name=EnumParameterTypes.InputFile) + @Format(name=EnumFormats.Tsv) + @Option(name = "-cw", aliases = {"--customWeights"}, usage = "an optional file containing weights for compound pairs", forbids = {"-dw"}) + public String weightFile = null; + + @Option(name = "-un", aliases = {"--undirected"}, usage = "create as undirected", required = false) + public boolean undirected = false; + + + @Format(name= EnumFormats.Txt) + @ParameterType(name= EnumParameterTypes.InputFile) + @Option(name = "-noi", usage = "input Node of interest file", required = true) + public String dataPath = null; + + @Option(name = "-out", aliases = {"--outDegree"}, usage = "use out-degree as scoring function instead of betweenness", required = false) + public boolean out = false; + + + public static void main(String[] args) { + + TopologicalPathwayAnalysis app = new TopologicalPathwayAnalysis(); + + app.parseArguments(args); + + app.run(); + + } + + + public void run() { + //open file + FileWriter fw = null; + try { + fw = new FileWriter(outputPath); + } catch (IOException e) { + System.err.println("Error while opening the output file"); + System.err.println(e.getMessage()); + System.exit(1); + } + + System.out.print("Reading SBML..."); + JsbmlReader reader = new JsbmlReader(this.inputPath); + ArrayList<PackageParser> pkgs = new ArrayList<>(Arrays.asList( + new NotesParser(false), new FBCParser(), new GroupPathwayParser())); + + BioNetwork network = null; + + try { + network = reader.read(pkgs); + } catch (Met4jSbmlReaderException e) { + System.err.println("Error while reading the SBML file"); + System.err.println(e.getMessage()); + System.exit(1); + } + System.out.println(" Done.\n\n"); + + + System.out.print("Buildinig Network..."); + Bionetwork2BioGraph builder = new Bionetwork2BioGraph(network); + CompoundGraph graph = builder.getCompoundGraph(); + + //Graph processing: side compound removal [optional] + if (inputSide != null) { + System.err.println("removing side compounds..."); + NodeMapping<BioMetabolite, ReactionEdge, CompoundGraph> mapper = new NodeMapping<>(graph).skipIfNotFound(); + BioCollection<BioMetabolite> sideCpds = null; + try { + sideCpds = mapper.map(inputSide); + } catch (IOException e) { + System.err.println("Error while reading the side compound file"); + System.err.println(e.getMessage()); + System.exit(1); + } + boolean removed = graph.removeAllVertices(sideCpds); + if (removed) System.err.println(sideCpds.size() + " compounds removed."); + } + + //Graph processing: set weights [optional] + WeightingPolicy<BioMetabolite, ReactionEdge, CompoundGraph> wp = new UnweightedPolicy<>(); + if (weightFile != null) { + System.err.println("Setting edge weights..."); + wp = new WeightsFromFile(weightFile); + wp.setWeight(graph); + System.out.println(" Done."); + }else{ + wp.setWeight(graph); + } + + //invert graph as undirected (copy edge weight to reversed edge) + if(undirected){ + System.out.print("Create Undirected..."); + graph.asUndirected(); + System.out.println(" Done."); + } + + //merge compartment + if(mergingStrat!=strategy.no){ + System.out.print("Merging compartments..."); + VertexContraction vc = new VertexContraction(); + VertexContraction.Mapper merger = mergingStrat.equals(strategy.by_name) ? new VertexContraction.MapByName() : new VertexContraction.MapByIdSubString(idRegex); + graph = vc.decompartmentalize(graph, merger); + System.out.println(" Done."); + } + + //remove isolated nodes + if(removeIsolated){ + System.out.println("Remove isolated nodes..."); + HashSet<BioMetabolite> nodes = new HashSet<>(graph.vertexSet()); + graph.removeIsolatedNodes(); + nodes.removeAll(graph.vertexSet()); + for(BioMetabolite n : nodes){ + System.out.println("\tremoving " + n.getName()); + } + System.out.println(" Done."); + } + + System.out.println(" Network successfully created.\n\n"); + + System.out.println("Importing nodes of interest"); + NodeMapping<BioMetabolite, ReactionEdge, CompoundGraph> mapper = new NodeMapping<>(graph).throwErrorIfNotFound(); + HashSet<BioMetabolite> data = null; + try { + data = new HashSet<>(mapper.map(dataPath)); + } catch (IOException e) { + System.err.println("Error while reading the source metabolite file"); + System.err.println(e.getMessage()); + System.exit(1); + } + System.out.println("Computing Pathway topology Analysis"); + + + System.out.println("Computing Pathway topology Analysis"); + PathwayTopologyAnalysis computor = new PathwayTopologyAnalysis(network,graph,data).useNormalization(); + PathwayTopologyAnalysis.IndividualScoringStrategy strat = out ? PathwayTopologyAnalysis.IndividualScoringStrategy.outDegree() : PathwayTopologyAnalysis.IndividualScoringStrategy.betweenness() ; + Map<BioPathway, Double> res = computor.run(strat,PathwayTopologyAnalysis.AggregationStrategy.rawSum()); + System.out.println("Computing Pathway topology Analysis"); + + //export results + System.out.print("Exporting..."); + try { + for (Map.Entry<BioPathway, Double> e : res.entrySet()) { + BioPathway p = e.getKey(); + fw.write(p.getId() + "\t" + p.getName() + "\t" + e.getValue() + "\n"); + } + fw.close(); + } catch (IOException e) { + System.err.println("Error while writing the result file"); + System.err.println(e.getMessage()); + System.exit(1); + } + System.err.println("Done."); + return; + } + + @Override + public String getLabel() {return this.getClass().getSimpleName();} + + @Override + public String getLongDescription() { + return ""; + } + + @Override + public String getShortDescription() {return "";} +} + -- GitLab From 68743f650ab13333e60db3ef2bc9743e5e7f7fad Mon Sep 17 00:00:00 2001 From: cfrainay <clement.frainay@inrae.fr> Date: Fri, 7 Jul 2023 16:55:17 +0200 Subject: [PATCH 10/20] improve doc, add description --- .../TopologicalPathwayAnalysis.java | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java index c5034a7b3..89430fb54 100644 --- a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java +++ b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java @@ -46,7 +46,7 @@ public class TopologicalPathwayAnalysis extends AbstractMet4jApplication { @ParameterType(name= EnumParameterTypes.OutputFile) @Format(name= EnumFormats.Gml) - @Option(name = "-o", usage = "output Graph file", required = true) + @Option(name = "-o", usage = "output result file", required = true) public String outputPath = null; enum strategy {no, by_name,by_id} @@ -61,7 +61,7 @@ public class TopologicalPathwayAnalysis extends AbstractMet4jApplication { @ParameterType(name=EnumParameterTypes.InputFile) @Format(name=EnumFormats.Tsv) - @Option(name = "-cw", aliases = {"--customWeights"}, usage = "an optional file containing weights for compound pairs", forbids = {"-dw"}) + @Option(name = "-cw", aliases = {"--customWeights"}, usage = "an optional file containing weights for compound pairs. Edges not found in file will be removed", forbids = {"-dw"}) public String weightFile = null; @Option(name = "-un", aliases = {"--undirected"}, usage = "create as undirected", required = false) @@ -140,7 +140,7 @@ public class TopologicalPathwayAnalysis extends AbstractMet4jApplication { WeightingPolicy<BioMetabolite, ReactionEdge, CompoundGraph> wp = new UnweightedPolicy<>(); if (weightFile != null) { System.err.println("Setting edge weights..."); - wp = new WeightsFromFile(weightFile); + wp = new WeightsFromFile(weightFile).removeEdgeNotInFile(); wp.setWeight(graph); System.out.println(" Done."); }else{ @@ -218,10 +218,14 @@ public class TopologicalPathwayAnalysis extends AbstractMet4jApplication { @Override public String getLongDescription() { - return ""; + return "Run a Topological Pathway Analysis (TPA) to identify key pathways based on topological properties of its mapped compounds." + + " From a list of compounds of interest, the app compute their betweenness centrality (which quantifies how often a compound acts as a intermediary along the shortest paths between pairs of other compounds in the network," + + " which, if high, suggest a critical role in the overall flow within the network). Each pathway is scored according to the summed centrality of its metabolites found in the dataset." + + " Alternatively to the betweenness, one can make use of the out-degree (the number of outgoing link, i.e. number of direct metabolic product) as a criterion of importance." + + " TPA is complementary to statistical enrichment analysis to ensures a more meaningful interpretation of the data, by taking into account the influence of identified compounds on the structure of the pathways."; } @Override - public String getShortDescription() {return "";} + public String getShortDescription() {return "Run a Topological Pathway Analysis to identify key pathways based on topological properties of its constituting compounds.";} } -- GitLab From 8214b69b0e72f34166233050f5d5b6ab7a99d8b7 Mon Sep 17 00:00:00 2001 From: Ludovic Cottret <ludovic.cottret@inra.fr> Date: Tue, 11 Jul 2023 11:46:53 +0200 Subject: [PATCH 11/20] change arguments doc --- .../networkAnalysis/TopologicalPathwayAnalysis.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java index 89430fb54..a2a3a6918 100644 --- a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java +++ b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java @@ -46,7 +46,7 @@ public class TopologicalPathwayAnalysis extends AbstractMet4jApplication { @ParameterType(name= EnumParameterTypes.OutputFile) @Format(name= EnumFormats.Gml) - @Option(name = "-o", usage = "output result file", required = true) + @Option(name = "-o", usage = "output result file (Gml format)", required = true) public String outputPath = null; enum strategy {no, by_name,by_id} -- GitLab From f7ed13c8a3e8916973a46b478c0cbe410274951c Mon Sep 17 00:00:00 2001 From: Ludovic Cottret <ludovic.cottret@inra.fr> Date: Tue, 11 Jul 2023 11:47:16 +0200 Subject: [PATCH 12/20] improve arguments doc --- .../networkAnalysis/TopologicalPathwayAnalysis.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java index a2a3a6918..93f0c473e 100644 --- a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java +++ b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java @@ -64,7 +64,7 @@ public class TopologicalPathwayAnalysis extends AbstractMet4jApplication { @Option(name = "-cw", aliases = {"--customWeights"}, usage = "an optional file containing weights for compound pairs. Edges not found in file will be removed", forbids = {"-dw"}) public String weightFile = null; - @Option(name = "-un", aliases = {"--undirected"}, usage = "create as undirected", required = false) + @Option(name = "-un", aliases = {"--undirected"}, usage = "the compound graph built from the metabolic network and used for computations will undirected, i.e. the reaction directions won't be taken into account", required = false) public boolean undirected = false; -- GitLab From c4e2dc39cd0efa598397d1eff755808ab6b73771 Mon Sep 17 00:00:00 2001 From: Ludovic Cottret <ludovic.cottret@inra.fr> Date: Tue, 11 Jul 2023 11:47:32 +0200 Subject: [PATCH 13/20] improve arguments doc --- .../networkAnalysis/TopologicalPathwayAnalysis.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java index 93f0c473e..b4035ee5c 100644 --- a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java +++ b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java @@ -70,7 +70,7 @@ public class TopologicalPathwayAnalysis extends AbstractMet4jApplication { @Format(name= EnumFormats.Txt) @ParameterType(name= EnumParameterTypes.InputFile) - @Option(name = "-noi", usage = "input Node of interest file", required = true) + @Option(name = "-noi", usage = "file containing the list of metabolites of interests (one per line)", required = true) public String dataPath = null; @Option(name = "-out", aliases = {"--outDegree"}, usage = "use out-degree as scoring function instead of betweenness", required = false) -- GitLab From 8e7272374c4f9bad92f0eede08dcd83b52fa5a25 Mon Sep 17 00:00:00 2001 From: Ludovic Cottret <ludovic.cottret@inra.fr> Date: Tue, 11 Jul 2023 11:47:46 +0200 Subject: [PATCH 14/20] fix typo --- .../networkAnalysis/TopologicalPathwayAnalysis.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java index b4035ee5c..368a4f9b2 100644 --- a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java +++ b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java @@ -116,7 +116,7 @@ public class TopologicalPathwayAnalysis extends AbstractMet4jApplication { System.out.println(" Done.\n\n"); - System.out.print("Buildinig Network..."); + System.out.print("Building Network..."); Bionetwork2BioGraph builder = new Bionetwork2BioGraph(network); CompoundGraph graph = builder.getCompoundGraph(); -- GitLab From c7799c11823f36c5fc84e52128097d1235992699 Mon Sep 17 00:00:00 2001 From: Ludovic Cottret <ludovic.cottret@inra.fr> Date: Tue, 11 Jul 2023 11:47:54 +0200 Subject: [PATCH 15/20] improve arguments doc --- .../networkAnalysis/TopologicalPathwayAnalysis.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java index 368a4f9b2..0def9f53c 100644 --- a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java +++ b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java @@ -122,7 +122,7 @@ public class TopologicalPathwayAnalysis extends AbstractMet4jApplication { //Graph processing: side compound removal [optional] if (inputSide != null) { - System.err.println("removing side compounds..."); + System.out.println("Removing side compounds..."); NodeMapping<BioMetabolite, ReactionEdge, CompoundGraph> mapper = new NodeMapping<>(graph).skipIfNotFound(); BioCollection<BioMetabolite> sideCpds = null; try { -- GitLab From 63bedb26c4d8d7bfa49eb0754b52537642671a12 Mon Sep 17 00:00:00 2001 From: Ludovic Cottret <ludovic.cottret@inra.fr> Date: Tue, 11 Jul 2023 11:48:17 +0200 Subject: [PATCH 16/20] change console output --- .../networkAnalysis/TopologicalPathwayAnalysis.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java index 0def9f53c..378db4858 100644 --- a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java +++ b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java @@ -133,7 +133,7 @@ public class TopologicalPathwayAnalysis extends AbstractMet4jApplication { System.exit(1); } boolean removed = graph.removeAllVertices(sideCpds); - if (removed) System.err.println(sideCpds.size() + " compounds removed."); + if (removed) System.out.println(sideCpds.size() + " compounds removed."); } //Graph processing: set weights [optional] -- GitLab From ac159ba17f36461100c89c3d7f0b4dedc6521952 Mon Sep 17 00:00:00 2001 From: Ludovic Cottret <ludovic.cottret@inra.fr> Date: Tue, 11 Jul 2023 11:48:21 +0200 Subject: [PATCH 17/20] change console output --- .../networkAnalysis/TopologicalPathwayAnalysis.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java index 378db4858..0374934bd 100644 --- a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java +++ b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java @@ -139,7 +139,7 @@ public class TopologicalPathwayAnalysis extends AbstractMet4jApplication { //Graph processing: set weights [optional] WeightingPolicy<BioMetabolite, ReactionEdge, CompoundGraph> wp = new UnweightedPolicy<>(); if (weightFile != null) { - System.err.println("Setting edge weights..."); + System.out.println("Setting edge weights..."); wp = new WeightsFromFile(weightFile).removeEdgeNotInFile(); wp.setWeight(graph); System.out.println(" Done."); -- GitLab From 44ea567cbb8ecc6af88228c5b1e10186159f31bf Mon Sep 17 00:00:00 2001 From: Ludovic Cottret <ludovic.cottret@inra.fr> Date: Tue, 11 Jul 2023 11:48:25 +0200 Subject: [PATCH 18/20] change console output --- .../networkAnalysis/TopologicalPathwayAnalysis.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java index 0374934bd..60a44ee16 100644 --- a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java +++ b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java @@ -209,7 +209,7 @@ public class TopologicalPathwayAnalysis extends AbstractMet4jApplication { System.err.println(e.getMessage()); System.exit(1); } - System.err.println("Done."); + System.out.println("Done."); return; } -- GitLab From 63f4b97251684a35bc4a744dabad33a625fe7739 Mon Sep 17 00:00:00 2001 From: cfrainay <clement.frainay@inrae.fr> Date: Tue, 11 Jul 2023 18:45:14 +0200 Subject: [PATCH 19/20] fix error for normalization with compound in kb not in graph + filter non mapped pathways --- .../computation/analyze/PathwayTopologyAnalysis.java | 6 ++++-- .../networkAnalysis/TopologicalPathwayAnalysis.java | 12 ++++++------ 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java index 15455dc73..9dbbf0aaf 100644 --- a/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java +++ b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java @@ -70,8 +70,8 @@ public class PathwayTopologyAnalysis { public Map<BioPathway,Double> run(IndividualScoringStrategy scoring, AggregationStrategy aggregation){ Map<BioPathway,Double> pathwayFinalScore = computePathwayScore(data, g, scoring, aggregation); if(normalize){ - //create background data (i.e dataset with all compounds) - Set<BioMetabolite> background = kb.values().stream().flatMap(Collection::stream) + //create background data (i.e dataset with all compounds in network) + Set<BioMetabolite> background = kb.values().stream().filter(v -> g.vertexSet().contains(v)).flatMap(Collection::stream) .collect(Collectors.toSet()); //compute pathway score as if all their compounds were in dataset (recompute individual score for all compounds) @@ -91,6 +91,8 @@ public class PathwayTopologyAnalysis { Compute topology pathway analysis */ private Map<BioPathway,Double> computePathwayScore(Set<BioMetabolite> data, CompoundGraph g, IndividualScoringStrategy scoring, AggregationStrategy aggregation){ + //filter kb to keep only mapped pathways + kb = kb.entrySet().stream().filter(e -> e.getValue().stream().anyMatch(data::contains)).collect(Collectors.toMap(e->e.getKey(),e->e.getValue())); //From compounds and given interaction network, compute topology score for each compound, using scoring strategy. Map<BioMetabolite,Double> individualScore = scoring.apply(data,g); //From knowledge base, get the pathway memberships and collect component's scores. diff --git a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java index 60a44ee16..5b2df5b72 100644 --- a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java +++ b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java @@ -61,7 +61,7 @@ public class TopologicalPathwayAnalysis extends AbstractMet4jApplication { @ParameterType(name=EnumParameterTypes.InputFile) @Format(name=EnumFormats.Tsv) - @Option(name = "-cw", aliases = {"--customWeights"}, usage = "an optional file containing weights for compound pairs. Edges not found in file will be removed", forbids = {"-dw"}) + @Option(name = "-cw", aliases = {"--customWeights"}, usage = "an optional file containing weights for compound pairs, taken into account for betweenness computation. Edges not found in file will be removed", forbids = {"-dw"}) public String weightFile = null; @Option(name = "-un", aliases = {"--undirected"}, usage = "the compound graph built from the metabolic network and used for computations will undirected, i.e. the reaction directions won't be taken into account", required = false) @@ -73,7 +73,7 @@ public class TopologicalPathwayAnalysis extends AbstractMet4jApplication { @Option(name = "-noi", usage = "file containing the list of metabolites of interests (one per line)", required = true) public String dataPath = null; - @Option(name = "-out", aliases = {"--outDegree"}, usage = "use out-degree as scoring function instead of betweenness", required = false) + @Option(name = "-out", aliases = {"--outDegree"}, usage = "use out-degree as scoring function instead of betweenness (faster computation)", required = false) public boolean out = false; @@ -102,7 +102,7 @@ public class TopologicalPathwayAnalysis extends AbstractMet4jApplication { System.out.print("Reading SBML..."); JsbmlReader reader = new JsbmlReader(this.inputPath); ArrayList<PackageParser> pkgs = new ArrayList<>(Arrays.asList( - new NotesParser(false), new FBCParser(), new GroupPathwayParser())); + new NotesParser(false), new GroupPathwayParser())); BioNetwork network = null; @@ -187,14 +187,14 @@ public class TopologicalPathwayAnalysis extends AbstractMet4jApplication { System.err.println(e.getMessage()); System.exit(1); } - System.out.println("Computing Pathway topology Analysis"); + System.out.println("Done."); - System.out.println("Computing Pathway topology Analysis"); + System.out.println("Computing Pathway topology Analysis... (may take a while)"); PathwayTopologyAnalysis computor = new PathwayTopologyAnalysis(network,graph,data).useNormalization(); PathwayTopologyAnalysis.IndividualScoringStrategy strat = out ? PathwayTopologyAnalysis.IndividualScoringStrategy.outDegree() : PathwayTopologyAnalysis.IndividualScoringStrategy.betweenness() ; Map<BioPathway, Double> res = computor.run(strat,PathwayTopologyAnalysis.AggregationStrategy.rawSum()); - System.out.println("Computing Pathway topology Analysis"); + System.out.println("Done."); //export results System.out.print("Exporting..."); -- GitLab From eae8a61bd77111df3686540ae890dddbd0882a48 Mon Sep 17 00:00:00 2001 From: cfrainay <clement.frainay@inrae.fr> Date: Wed, 12 Jul 2023 16:12:56 +0200 Subject: [PATCH 20/20] fix normalization, update test --- .../computation/analyze/PathwayTopologyAnalysis.java | 3 +-- .../metexplore/met4j_graph/TestPathwayTopologyAnalysis.java | 3 ++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java index 9dbbf0aaf..8f910ae6f 100644 --- a/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java +++ b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java @@ -71,12 +71,11 @@ public class PathwayTopologyAnalysis { Map<BioPathway,Double> pathwayFinalScore = computePathwayScore(data, g, scoring, aggregation); if(normalize){ //create background data (i.e dataset with all compounds in network) - Set<BioMetabolite> background = kb.values().stream().filter(v -> g.vertexSet().contains(v)).flatMap(Collection::stream) + Set<BioMetabolite> background = kb.values().stream().flatMap(Collection::stream).filter(v -> g.vertexSet().contains(v)) .collect(Collectors.toSet()); //compute pathway score as if all their compounds were in dataset (recompute individual score for all compounds) Map<BioPathway,Double> byPathwayBackgroundScore = computePathwayScore(background, g, scoring, aggregation); - //set final pathway score as ratio between pathway score computed from data and theoretical maximal pathway score pathwayFinalScore = Stream.concat(pathwayFinalScore.entrySet().stream(), byPathwayBackgroundScore.entrySet().stream()) .collect(Collectors.toMap( diff --git a/met4j-graph/src/test/java/fr/inrae/toulouse/metexplore/met4j_graph/TestPathwayTopologyAnalysis.java b/met4j-graph/src/test/java/fr/inrae/toulouse/metexplore/met4j_graph/TestPathwayTopologyAnalysis.java index c84d30850..1fed61424 100644 --- a/met4j-graph/src/test/java/fr/inrae/toulouse/metexplore/met4j_graph/TestPathwayTopologyAnalysis.java +++ b/met4j-graph/src/test/java/fr/inrae/toulouse/metexplore/met4j_graph/TestPathwayTopologyAnalysis.java @@ -16,6 +16,7 @@ import java.util.HashSet; import java.util.Map; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; public class TestPathwayTopologyAnalysis { @@ -182,7 +183,7 @@ public class TestPathwayTopologyAnalysis { assertEquals(toyBetweenness.get(a)+toyBetweenness.get(b), res.get(x), 0.00000001); assertEquals(toyBetweenness.get(b), res.get(y), 0.00000001); - assertEquals(0, res.get(z), 0.00000001); + assertFalse(res.containsKey(z)); } @Test -- GitLab