From 372b2e5cc60b37622cff7b942625a749a189ea68 Mon Sep 17 00:00:00 2001
From: cfrainay <clement.frainay@inrae.fr>
Date: Fri, 3 Feb 2023 17:28:36 +0100
Subject: [PATCH 01/20] [graph] Stub: create class for pathway topology
 analysis

---
 .../analyze/PathwayTopologyAnalaysis.java     | 130 ++++++++++++++++++
 1 file changed, 130 insertions(+)
 create mode 100644 met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalaysis.java

diff --git a/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalaysis.java b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalaysis.java
new file mode 100644
index 000000000..0346d0197
--- /dev/null
+++ b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalaysis.java
@@ -0,0 +1,130 @@
+package fr.inrae.toulouse.metexplore.met4j_graph.computation.analyze;
+
+import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioMetabolite;
+import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioNetwork;
+import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioPathway;
+import fr.inrae.toulouse.metexplore.met4j_graph.computation.analyze.centrality.PathBasedCentrality;
+import fr.inrae.toulouse.metexplore.met4j_graph.core.compound.CompoundGraph;
+import fr.inrae.toulouse.metexplore.met4j_graph.core.compound.ReactionEdge;
+
+import java.util.*;
+import java.util.function.BiFunction;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+
+/**
+ * Class to aggregate bioentity centrality into pathway score for Pathway Topology Analysis
+ */
+public class PathwayTopologyAnalaysis {
+
+    Map<BioPathway,Collection<BioMetabolite>> kb;
+    CompoundGraph g;
+    Map<BioMetabolite,Double> data;
+
+    public PathwayTopologyAnalaysis(Map<BioPathway,Collection<BioMetabolite>> knowledgeBase, CompoundGraph topology, Map<BioMetabolite,Double> compoundData){
+        this.kb=knowledgeBase;
+        this.g = topology;
+        this.data=compoundData;
+    }
+
+    public PathwayTopologyAnalaysis(BioNetwork knowledgeBase, CompoundGraph topology, Map<BioMetabolite,Double> compoundData){
+        this.kb=new HashMap<>();
+        for(BioPathway p : knowledgeBase.getPathwaysView()){
+            kb.put(p,knowledgeBase.getMetabolitesFromPathway(p));
+        }
+        this.g = topology;
+        this.data=compoundData;
+    }
+
+    public PathwayTopologyAnalaysis(BioNetwork knowledgeBase, CompoundGraph topology, Set<BioMetabolite> compoundOfInterest){
+        this.kb=new HashMap<>();
+        for(BioPathway p : knowledgeBase.getPathwaysView()){
+            kb.put(p,knowledgeBase.getMetabolitesFromPathway(p));
+        }
+        this.g = topology;
+        Double defaultValue = 1.0/compoundOfInterest.size();
+        this.data=compoundOfInterest.stream().
+                collect(Collectors.toMap(c -> c, c -> defaultValue));
+    }
+    public PathwayTopologyAnalaysis(Map<BioPathway,Collection<BioMetabolite>> knowledgeBase, CompoundGraph topology, Set<BioMetabolite> compoundOfInterest){
+        this.kb=knowledgeBase;
+        this.g = topology;
+        Double defaultValue = 1.0/compoundOfInterest.size();
+        this.data=compoundOfInterest.stream().
+                collect(Collectors.toMap(c -> c, c -> defaultValue));
+    }
+
+    public Map<BioPathway,Double> runAnalysis(IndividualScoringStrategy scoring, AggregationStrategy aggregation){
+        //From input data and given interaction network, compute topology score for each compound, using scoring strategy.
+        Map<BioMetabolite,Double> individualScore = scoring.apply(data,g);
+        //From knowledge base, get the pathway memberships and collect component's scores.
+        Map<BioPathway,Collection<Double>> pathwayScores = individualScoresByPathway(individualScore);
+        //Using aggregation strategy, compute for each pathway its final score from its constituents ones.
+        Map<BioPathway,Double> pathwayFinalScore = aggregation.apply(pathwayScores);
+        return pathwayFinalScore;
+    }
+
+    private HashMap<BioPathway,Collection<Double>> individualScoresByPathway(Map<BioMetabolite,Double> individualScore){
+        HashMap<BioPathway,Collection<Double>> pathwayScores = new HashMap<>();
+        for(Map.Entry<BioPathway,Collection<BioMetabolite>> pathwayEntry : kb.entrySet()){
+            List<Double> componentsScore = pathwayEntry.getValue().stream()
+                    .filter(individualScore::containsKey)
+                    .map(individualScore::get)
+                    .collect(Collectors.toList());
+            pathwayScores.put(pathwayEntry.getKey(),componentsScore);
+        }
+        return pathwayScores;
+    }
+
+    public interface IndividualScoringStrategy extends BiFunction<Map<BioMetabolite,Double>,CompoundGraph,Map<BioMetabolite,Double>> {
+
+        static IndividualScoringStrategy betweenness(){
+            return  (Map<BioMetabolite,Double> data,CompoundGraph graph) -> {
+            PathBasedCentrality<BioMetabolite, ReactionEdge,CompoundGraph> centralityAnalyser = new PathBasedCentrality<>(graph);
+            Map<BioMetabolite, Integer> betweenness = centralityAnalyser.getGeodesicBetweenness();
+            return betweenness.entrySet().stream()
+                    .filter(e -> data.containsKey(e.getKey()))
+                    .collect(Collectors.toMap(e -> e.getKey(), e -> Double.valueOf(data.get(e.getKey()))));
+            };
+        }
+
+        static IndividualScoringStrategy outDegree(){
+            return  (Map<BioMetabolite,Double> data,CompoundGraph graph) -> data.keySet().stream()
+                    .collect(Collectors.toMap(v -> v, v -> Double.valueOf(graph.outDegreeOf(v))));
+        }
+
+        static IndividualScoringStrategy neighbors(){
+            return  (Map<BioMetabolite,Double> data,CompoundGraph graph) -> data.keySet().stream()
+                    .collect(Collectors.toMap(v -> v, v -> Double.valueOf(graph.neighborListOf(v).size())));
+        }
+
+    }
+
+    public interface AggregationStrategy extends Function<Map<BioPathway,Collection<Double>>,Map<BioPathway,Double>> {
+        static AggregationStrategy rawSum(){
+            return (Map<BioPathway,Collection<Double>> pathwayScores) ->
+            {
+                Map<BioPathway,Double> pathwayFinalScore = new HashMap<>();
+                for(Map.Entry<BioPathway,Collection<Double>> e : pathwayScores.entrySet()){
+                    Double finalScoring = 0.0;
+                    for(Double score : e.getValue()){
+                        finalScoring+=score;
+                    }
+                    pathwayFinalScore.put(e.getKey(),finalScoring);
+                }
+                return pathwayFinalScore;
+            };
+        }
+
+
+        static AggregationStrategy normalizedSum(){
+            return (Map<BioPathway,Collection<Double>> pathwayScores) ->
+            {
+                Map<BioPathway,Double> pathwayRawScore = rawSum().apply(pathwayScores);
+                Double sum = pathwayRawScore.values().stream().reduce(0.0, (x,y) -> x+y);
+                return pathwayRawScore.entrySet().stream()
+                        .collect(Collectors.toMap(e -> e.getKey(), e -> e.getValue()/sum));
+            };
+        }
+    }
+}
-- 
GitLab


From cc50281e255d9602bab92145aec25683e35c1855 Mon Sep 17 00:00:00 2001
From: cfrainay <clement.frainay@inrae.fr>
Date: Tue, 7 Feb 2023 14:56:17 +0100
Subject: [PATCH 02/20] add javadoc

---
 ...ysis.java => PathwayTopologyAnalysis.java} | 79 ++++++++++++++++---
 1 file changed, 68 insertions(+), 11 deletions(-)
 rename met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/{PathwayTopologyAnalaysis.java => PathwayTopologyAnalysis.java} (59%)

diff --git a/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalaysis.java b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java
similarity index 59%
rename from met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalaysis.java
rename to met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java
index 0346d0197..6942c5364 100644
--- a/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalaysis.java
+++ b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java
@@ -13,21 +13,33 @@ import java.util.function.Function;
 import java.util.stream.Collectors;
 
 /**
- * Class to aggregate bioentity centrality into pathway score for Pathway Topology Analysis
+ * Class to aggregate metabolites' centrality into pathway score for Pathway Topology Analysis
  */
-public class PathwayTopologyAnalaysis {
-
-    Map<BioPathway,Collection<BioMetabolite>> kb;
-    CompoundGraph g;
-    Map<BioMetabolite,Double> data;
-
-    public PathwayTopologyAnalaysis(Map<BioPathway,Collection<BioMetabolite>> knowledgeBase, CompoundGraph topology, Map<BioMetabolite,Double> compoundData){
+public class PathwayTopologyAnalysis {
+
+    private Map<BioPathway,Collection<BioMetabolite>> kb;
+    private CompoundGraph g;
+    private Map<BioMetabolite,Double> data;
+
+    /**
+     * Instantiate class to perform Pathway Topology Analysis
+     * @param knowledgeBase Map affiliating metabolites to pathways
+     * @param topology CompoundGraph storing metabolites' relationships
+     * @param compoundData Map containing input data (metabolites abundance for example)
+     */
+    public PathwayTopologyAnalysis(Map<BioPathway,Collection<BioMetabolite>> knowledgeBase, CompoundGraph topology, Map<BioMetabolite,Double> compoundData){
         this.kb=knowledgeBase;
         this.g = topology;
         this.data=compoundData;
     }
 
-    public PathwayTopologyAnalaysis(BioNetwork knowledgeBase, CompoundGraph topology, Map<BioMetabolite,Double> compoundData){
+    /**
+     * Instantiate class to perform Pathway Topology Analysis
+     * @param knowledgeBase BioNetwork affiliating metabolites to pathways
+     * @param topology CompoundGraph storing metabolites' relationships
+     * @param compoundData Map containing input data (metabolites abundance for example)
+     */
+    public PathwayTopologyAnalysis(BioNetwork knowledgeBase, CompoundGraph topology, Map<BioMetabolite,Double> compoundData){
         this.kb=new HashMap<>();
         for(BioPathway p : knowledgeBase.getPathwaysView()){
             kb.put(p,knowledgeBase.getMetabolitesFromPathway(p));
@@ -36,7 +48,13 @@ public class PathwayTopologyAnalaysis {
         this.data=compoundData;
     }
 
-    public PathwayTopologyAnalaysis(BioNetwork knowledgeBase, CompoundGraph topology, Set<BioMetabolite> compoundOfInterest){
+    /**
+     * Instantiate class to perform Pathway Topology Analysis
+     * @param knowledgeBase BioNetwork affiliating metabolites to pathways
+     * @param topology CompoundGraph storing metabolites' relationships
+     * @param compoundOfInterest Set containing input data (significantly overrepresented metabolites for example)
+     */
+    public PathwayTopologyAnalysis(BioNetwork knowledgeBase, CompoundGraph topology, Set<BioMetabolite> compoundOfInterest){
         this.kb=new HashMap<>();
         for(BioPathway p : knowledgeBase.getPathwaysView()){
             kb.put(p,knowledgeBase.getMetabolitesFromPathway(p));
@@ -46,7 +64,13 @@ public class PathwayTopologyAnalaysis {
         this.data=compoundOfInterest.stream().
                 collect(Collectors.toMap(c -> c, c -> defaultValue));
     }
-    public PathwayTopologyAnalaysis(Map<BioPathway,Collection<BioMetabolite>> knowledgeBase, CompoundGraph topology, Set<BioMetabolite> compoundOfInterest){
+    /**
+     * Instantiate class to perform Pathway Topology Analysis
+     * @param knowledgeBase Map affiliating metabolites to pathways
+     * @param topology CompoundGraph storing metabolites' relationships
+     * @param compoundOfInterest Set containing input data (significantly overrepresented metabolites for example)
+     */
+    public PathwayTopologyAnalysis(Map<BioPathway,Collection<BioMetabolite>> knowledgeBase, CompoundGraph topology, Set<BioMetabolite> compoundOfInterest){
         this.kb=knowledgeBase;
         this.g = topology;
         Double defaultValue = 1.0/compoundOfInterest.size();
@@ -54,6 +78,12 @@ public class PathwayTopologyAnalaysis {
                 collect(Collectors.toMap(c -> c, c -> defaultValue));
     }
 
+    /**
+     * Method to compute Pathway Impact from data, according to a given strategy for individual scoring and aggregation
+     * @param scoring an IndividualScoringStrategy for scoring metabolites (centrality measure)
+     * @param aggregation an AggregationStrategy for aggregating component scores into pathway score.
+     * @return a Map of Pathways and their score
+     */
     public Map<BioPathway,Double> runAnalysis(IndividualScoringStrategy scoring, AggregationStrategy aggregation){
         //From input data and given interaction network, compute topology score for each compound, using scoring strategy.
         Map<BioMetabolite,Double> individualScore = scoring.apply(data,g);
@@ -64,6 +94,9 @@ public class PathwayTopologyAnalaysis {
         return pathwayFinalScore;
     }
 
+    /*
+    From associated compound given by knowledge base, retrieve list of components' scores for each pathway
+     */
     private HashMap<BioPathway,Collection<Double>> individualScoresByPathway(Map<BioMetabolite,Double> individualScore){
         HashMap<BioPathway,Collection<Double>> pathwayScores = new HashMap<>();
         for(Map.Entry<BioPathway,Collection<BioMetabolite>> pathwayEntry : kb.entrySet()){
@@ -76,8 +109,15 @@ public class PathwayTopologyAnalaysis {
         return pathwayScores;
     }
 
+    /**
+     * Interface for individual scoring strategy, computing metabolites impact
+     */
     public interface IndividualScoringStrategy extends BiFunction<Map<BioMetabolite,Double>,CompoundGraph,Map<BioMetabolite,Double>> {
 
+        /**
+         * Use betweenness as scoring function, i.e. the number of shortest paths passing through a given node (excluding paths where it is the starting or ending node).
+         * @return a map of compounds and their respective impact score.
+         */
         static IndividualScoringStrategy betweenness(){
             return  (Map<BioMetabolite,Double> data,CompoundGraph graph) -> {
             PathBasedCentrality<BioMetabolite, ReactionEdge,CompoundGraph> centralityAnalyser = new PathBasedCentrality<>(graph);
@@ -88,11 +128,20 @@ public class PathwayTopologyAnalaysis {
             };
         }
 
+        /**
+         * Use out degree as scoring function, i.e. the number of outgoing edges of a node.
+         * @return a map of compounds and their respective impact score.
+         */
         static IndividualScoringStrategy outDegree(){
             return  (Map<BioMetabolite,Double> data,CompoundGraph graph) -> data.keySet().stream()
                     .collect(Collectors.toMap(v -> v, v -> Double.valueOf(graph.outDegreeOf(v))));
         }
 
+        /**
+         * Use number of neighbors as scoring function. Contrary to degree, this is not impacted by parallel edges
+         * (same pairs of nodes connected by different edges corresponding to different reactions)
+         * @return a map of compounds and their respective impact score.
+         */
         static IndividualScoringStrategy neighbors(){
             return  (Map<BioMetabolite,Double> data,CompoundGraph graph) -> data.keySet().stream()
                     .collect(Collectors.toMap(v -> v, v -> Double.valueOf(graph.neighborListOf(v).size())));
@@ -100,7 +149,15 @@ public class PathwayTopologyAnalaysis {
 
     }
 
+    /**
+     * Interface for aggregation strategy, computing pathway impact from constituting compounds' impact
+     */
     public interface AggregationStrategy extends Function<Map<BioPathway,Collection<Double>>,Map<BioPathway,Double>> {
+
+        /**
+         * Simply count the sum of compounds of interest scores as the final pathway score
+         * @return
+         */
         static AggregationStrategy rawSum(){
             return (Map<BioPathway,Collection<Double>> pathwayScores) ->
             {
-- 
GitLab


From 5c1f9fbaa4513f51ea3710f322b8a4b0d48a59be Mon Sep 17 00:00:00 2001
From: cfrainay <clement.frainay@inrae.fr>
Date: Tue, 7 Feb 2023 17:32:00 +0100
Subject: [PATCH 03/20] add PageRank as individual scoring strategy.

Also simplified pagerank computation in EigenvectorCentrality class
---
 .../analyze/PathwayTopologyAnalysis.java      | 29 +++++++++++++++++-
 .../centrality/EigenVectorCentrality.java     | 30 +++++++++++--------
 2 files changed, 46 insertions(+), 13 deletions(-)

diff --git a/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java
index 6942c5364..bcc67669f 100644
--- a/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java
+++ b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java
@@ -3,6 +3,7 @@ package fr.inrae.toulouse.metexplore.met4j_graph.computation.analyze;
 import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioMetabolite;
 import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioNetwork;
 import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioPathway;
+import fr.inrae.toulouse.metexplore.met4j_graph.computation.analyze.centrality.EigenVectorCentrality;
 import fr.inrae.toulouse.metexplore.met4j_graph.computation.analyze.centrality.PathBasedCentrality;
 import fr.inrae.toulouse.metexplore.met4j_graph.core.compound.CompoundGraph;
 import fr.inrae.toulouse.metexplore.met4j_graph.core.compound.ReactionEdge;
@@ -115,7 +116,7 @@ public class PathwayTopologyAnalysis {
     public interface IndividualScoringStrategy extends BiFunction<Map<BioMetabolite,Double>,CompoundGraph,Map<BioMetabolite,Double>> {
 
         /**
-         * Use betweenness as scoring function, i.e. the number of shortest paths passing through a given node (excluding paths where it is the starting or ending node).
+         * Use betweenness as scoring function, i.e. the proportion of shortest paths passing through a given node (excluding paths where it is the starting or ending node).
          * @return a map of compounds and their respective impact score.
          */
         static IndividualScoringStrategy betweenness(){
@@ -128,6 +129,32 @@ public class PathwayTopologyAnalysis {
             };
         }
 
+        /**
+         * Use PageRank as scoring function, a centrality measure that represents the likelihood that a random walk reach a particular node.
+         * This is run with default damping factor 0.85, using power iteration approximation with 15000 max iterations and 0.001 tolerance for convergence
+         * @return a map of compounds and their respective impact score.
+         */
+        static IndividualScoringStrategy pageRank(){
+            return IndividualScoringStrategy.pageRank(0.85,15000,0.001);
+        }
+
+        /**
+         * Use PageRank as scoring function, a centrality measure that represents the likelihood that a random walk reach a particular node
+         * @param dampingFactor damping factor
+         * @param maxIter maximal number of iteration of the power method
+         * @param tolerance convergence tolerance
+         * @return a map of compounds and their respective impact score.
+         */
+        static IndividualScoringStrategy pageRank(Double dampingFactor, int maxIter, double tolerance){
+            return  (Map<BioMetabolite,Double> data,CompoundGraph graph) -> {
+                EigenVectorCentrality<BioMetabolite, ReactionEdge,CompoundGraph> centralityAnalyser = new EigenVectorCentrality<>(graph);
+                centralityAnalyser.addJumpProb(graph.vertexSet().stream()
+                        .map(BioMetabolite::getId).collect(Collectors.toSet()), dampingFactor);
+                Map<String, Double> pageRank = centralityAnalyser.computePowerMethodPageRank(dampingFactor,maxIter,tolerance);
+                return pageRank.entrySet().stream().collect(Collectors.toMap(e -> graph.getVertex(e.getKey()), e -> e.getValue()));
+            };
+        }
+
         /**
          * Use out degree as scoring function, i.e. the number of outgoing edges of a node.
          * @return a map of compounds and their respective impact score.
diff --git a/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/centrality/EigenVectorCentrality.java b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/centrality/EigenVectorCentrality.java
index d3f0a32c5..d1893e22c 100644
--- a/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/centrality/EigenVectorCentrality.java
+++ b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/centrality/EigenVectorCentrality.java
@@ -35,19 +35,15 @@
  */
 package fr.inrae.toulouse.metexplore.met4j_graph.computation.analyze.centrality;
 
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
+import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioEntity;
 import fr.inrae.toulouse.metexplore.met4j_graph.computation.utils.ComputeAdjacencyMatrix;
 import fr.inrae.toulouse.metexplore.met4j_graph.core.BioGraph;
 import fr.inrae.toulouse.metexplore.met4j_graph.core.Edge;
 import fr.inrae.toulouse.metexplore.met4j_mathUtils.matrix.BioMatrix;
 import fr.inrae.toulouse.metexplore.met4j_mathUtils.matrix.EjmlMatrix;
-import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioEntity;
+
+import java.util.*;
+import java.util.stream.Collectors;
 
 /**
  * Class to compute the eigen vector centrality of each vertex in a BioGraph.
@@ -134,17 +130,27 @@ public class EigenVectorCentrality<V extends BioEntity, E extends Edge<V>, G ext
 	/**
 	 * Gets a map with all nodes id as key and global page rank
 	 *
-	 * @param d the damping factor
+	 * @param dampingFactor the damping factor
 	 * @return the map with node identifier and corresponding centrality
 	 */
-	public HashMap<String, Double> computePageRank(double d){
+	public HashMap<String, Double> computePageRank(double dampingFactor){
 		BioMatrix tmp = adjacencyMatrix.copy();
-		addJumpProb(adjacencyMatrix.getRowLabelMap().keySet(),d);
+		addJumpProb(adjacencyMatrix.getRowLabelMap().keySet(),1-dampingFactor);
 		HashMap<String, Double> result = computeEigenVectorCentrality();
 		adjacencyMatrix = tmp;
 		return result;
 	}
 
+	public HashMap<String, Double> computePowerMethodPageRank(double dampingFactor, int maxNbOfIter, double tolerance){
+		BioMatrix tmp = adjacencyMatrix.copy();
+		Set<String> allNodes = adjacencyMatrix.getRowLabelMap().keySet();
+		addJumpProb(allNodes,1 - dampingFactor);
+		Map<String, Double> seeds = allNodes.stream().collect(Collectors.toMap(k -> k, k -> 1.0 / allNodes.size()));
+
+		HashMap<String, Double> result = powerIteration(seeds, maxNbOfIter, tolerance);
+		adjacencyMatrix = tmp;
+		return result;
+	}
 
 	/**
 	 * add a constant probability to "jump" (i.e. go to another node without necessarily following an edge) to defined set of node
@@ -329,7 +335,7 @@ public class EigenVectorCentrality<V extends BioEntity, E extends Edge<V>, G ext
 	 * @param tol the tolerance, if the max delta between two iteration is below this value, the result is returned
 	 * @return map with node id as key and eigen vector centrality as value
 	 */
-	public HashMap<String, Double> powerIteration(HashMap<String, Double> seeds, int maxIter, double tol){
+	public HashMap<String, Double> powerIteration(Map<String, Double> seeds, int maxIter, double tol){
 		BioMatrix rank = new EjmlMatrix(1, adjacencyMatrix.numCols());
 		for(Map.Entry<String,Integer> entry : adjacencyMatrix.getRowLabelMap().entrySet()){
 			String e = entry.getKey();
-- 
GitLab


From b6b2d1b76e0b32f92abb3cc23f145157fa15b457 Mon Sep 17 00:00:00 2001
From: cfrainay <clement.frainay@inrae.fr>
Date: Wed, 8 Feb 2023 17:29:01 +0100
Subject: [PATCH 04/20] add normalization

---
 .../analyze/PathwayTopologyAnalysis.java      | 55 ++++++++++++++-----
 1 file changed, 41 insertions(+), 14 deletions(-)

diff --git a/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java
index bcc67669f..2d7f37017 100644
--- a/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java
+++ b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java
@@ -12,6 +12,7 @@ import java.util.*;
 import java.util.function.BiFunction;
 import java.util.function.Function;
 import java.util.stream.Collectors;
+import java.util.stream.Stream;
 
 /**
  * Class to aggregate metabolites' centrality into pathway score for Pathway Topology Analysis
@@ -22,6 +23,8 @@ public class PathwayTopologyAnalysis {
     private CompoundGraph g;
     private Map<BioMetabolite,Double> data;
 
+    private boolean normalize;
+
     /**
      * Instantiate class to perform Pathway Topology Analysis
      * @param knowledgeBase Map affiliating metabolites to pathways
@@ -65,6 +68,7 @@ public class PathwayTopologyAnalysis {
         this.data=compoundOfInterest.stream().
                 collect(Collectors.toMap(c -> c, c -> defaultValue));
     }
+
     /**
      * Instantiate class to perform Pathway Topology Analysis
      * @param knowledgeBase Map affiliating metabolites to pathways
@@ -79,14 +83,48 @@ public class PathwayTopologyAnalysis {
                 collect(Collectors.toMap(c -> c, c -> defaultValue));
     }
 
+    /**
+     * Use normalized score, using the ratio between the raw pathway score and the maximum score by pathway
+     * @return a PathwayTopologyAnalysis object
+     */
+    public PathwayTopologyAnalysis useNormalization(){
+        this.normalize=true;
+        return this;
+    }
+
     /**
      * Method to compute Pathway Impact from data, according to a given strategy for individual scoring and aggregation
      * @param scoring an IndividualScoringStrategy for scoring metabolites (centrality measure)
      * @param aggregation an AggregationStrategy for aggregating component scores into pathway score.
      * @return a Map of Pathways and their score
      */
-    public Map<BioPathway,Double> runAnalysis(IndividualScoringStrategy scoring, AggregationStrategy aggregation){
-        //From input data and given interaction network, compute topology score for each compound, using scoring strategy.
+    public Map<BioPathway,Double> run(IndividualScoringStrategy scoring, AggregationStrategy aggregation){
+        Map<BioPathway,Double> pathwayFinalScore = computePathwayScore(data, g, scoring, aggregation);
+        if(normalize){
+            //create background data (i.e dataset with all compounds)
+            Set<BioMetabolite> background = kb.values().stream().flatMap(Collection::stream)
+                    .collect(Collectors.toSet());
+            Map<BioMetabolite, Double> backgroundDefaultValues =  background
+                    .stream().collect(Collectors.toMap(k -> k, k-> 1.0/background.size()));
+
+            //compute pathway score as if all their compounds were in dataset (recompute individual score for all compounds)
+            Map<BioPathway,Double> byPathwayBackgroundScore = computePathwayScore(backgroundDefaultValues, g, scoring, aggregation);
+
+            //set final pathway score as ratio between pathway score computed from data and theoretical maximal pathway score
+            pathwayFinalScore = Stream.concat(pathwayFinalScore.entrySet().stream(), byPathwayBackgroundScore.entrySet().stream())
+                    .collect(Collectors.toMap(
+                            Map.Entry::getKey,
+                            Map.Entry::getValue,
+                            (value1, value2) -> value1/value2));
+        }
+        return pathwayFinalScore;
+    }
+
+    /*
+    Compute topology pathway analysis
+    */
+    private Map<BioPathway,Double> computePathwayScore(Map<BioMetabolite, Double> data, CompoundGraph g, IndividualScoringStrategy scoring, AggregationStrategy aggregation){
+        //From compounds and given interaction network, compute topology score for each compound, using scoring strategy.
         Map<BioMetabolite,Double> individualScore = scoring.apply(data,g);
         //From knowledge base, get the pathway memberships and collect component's scores.
         Map<BioPathway,Collection<Double>> pathwayScores = individualScoresByPathway(individualScore);
@@ -98,7 +136,7 @@ public class PathwayTopologyAnalysis {
     /*
     From associated compound given by knowledge base, retrieve list of components' scores for each pathway
      */
-    private HashMap<BioPathway,Collection<Double>> individualScoresByPathway(Map<BioMetabolite,Double> individualScore){
+    private Map<BioPathway,Collection<Double>> individualScoresByPathway(Map<BioMetabolite,Double> individualScore){
         HashMap<BioPathway,Collection<Double>> pathwayScores = new HashMap<>();
         for(Map.Entry<BioPathway,Collection<BioMetabolite>> pathwayEntry : kb.entrySet()){
             List<Double> componentsScore = pathwayEntry.getValue().stream()
@@ -199,16 +237,5 @@ public class PathwayTopologyAnalysis {
                 return pathwayFinalScore;
             };
         }
-
-
-        static AggregationStrategy normalizedSum(){
-            return (Map<BioPathway,Collection<Double>> pathwayScores) ->
-            {
-                Map<BioPathway,Double> pathwayRawScore = rawSum().apply(pathwayScores);
-                Double sum = pathwayRawScore.values().stream().reduce(0.0, (x,y) -> x+y);
-                return pathwayRawScore.entrySet().stream()
-                        .collect(Collectors.toMap(e -> e.getKey(), e -> e.getValue()/sum));
-            };
-        }
     }
 }
-- 
GitLab


From 1847d29afa0f477e5e2a3f0ee5e47bf69aa7812c Mon Sep 17 00:00:00 2001
From: cfrainay <clement.frainay@inrae.fr>
Date: Thu, 6 Jul 2023 17:01:15 +0200
Subject: [PATCH 05/20] fix betweenness scoring

---
 .../computation/analyze/PathwayTopologyAnalysis.java            | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java
index 2d7f37017..7f1f44007 100644
--- a/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java
+++ b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java
@@ -163,7 +163,7 @@ public class PathwayTopologyAnalysis {
             Map<BioMetabolite, Integer> betweenness = centralityAnalyser.getGeodesicBetweenness();
             return betweenness.entrySet().stream()
                     .filter(e -> data.containsKey(e.getKey()))
-                    .collect(Collectors.toMap(e -> e.getKey(), e -> Double.valueOf(data.get(e.getKey()))));
+                    .collect(Collectors.toMap(e -> e.getKey(), e -> Double.valueOf(betweenness.get(e.getKey()))));
             };
         }
 
-- 
GitLab


From 28716e86e7aa1d57bf8886d93b2ba84174cadb89 Mon Sep 17 00:00:00 2001
From: cfrainay <clement.frainay@inrae.fr>
Date: Thu, 6 Jul 2023 17:07:58 +0200
Subject: [PATCH 06/20] REMOVE DATA WEIGHT

---
 .../analyze/PathwayTopologyAnalysis.java      | 59 ++++---------------
 1 file changed, 13 insertions(+), 46 deletions(-)

diff --git a/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java
index 7f1f44007..ef4dd399b 100644
--- a/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java
+++ b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java
@@ -21,7 +21,7 @@ public class PathwayTopologyAnalysis {
 
     private Map<BioPathway,Collection<BioMetabolite>> kb;
     private CompoundGraph g;
-    private Map<BioMetabolite,Double> data;
+    private Set<BioMetabolite> data;
 
     private boolean normalize;
 
@@ -29,27 +29,12 @@ public class PathwayTopologyAnalysis {
      * Instantiate class to perform Pathway Topology Analysis
      * @param knowledgeBase Map affiliating metabolites to pathways
      * @param topology CompoundGraph storing metabolites' relationships
-     * @param compoundData Map containing input data (metabolites abundance for example)
+     * @param compoundOfInterest Set containing input data (significantly overrepresented metabolites for example)
      */
-    public PathwayTopologyAnalysis(Map<BioPathway,Collection<BioMetabolite>> knowledgeBase, CompoundGraph topology, Map<BioMetabolite,Double> compoundData){
+    public PathwayTopologyAnalysis(Map<BioPathway,Collection<BioMetabolite>> knowledgeBase, CompoundGraph topology, Set<BioMetabolite> compoundOfInterest){
         this.kb=knowledgeBase;
         this.g = topology;
-        this.data=compoundData;
-    }
-
-    /**
-     * Instantiate class to perform Pathway Topology Analysis
-     * @param knowledgeBase BioNetwork affiliating metabolites to pathways
-     * @param topology CompoundGraph storing metabolites' relationships
-     * @param compoundData Map containing input data (metabolites abundance for example)
-     */
-    public PathwayTopologyAnalysis(BioNetwork knowledgeBase, CompoundGraph topology, Map<BioMetabolite,Double> compoundData){
-        this.kb=new HashMap<>();
-        for(BioPathway p : knowledgeBase.getPathwaysView()){
-            kb.put(p,knowledgeBase.getMetabolitesFromPathway(p));
-        }
-        this.g = topology;
-        this.data=compoundData;
+        this.data=compoundOfInterest;
     }
 
     /**
@@ -64,23 +49,7 @@ public class PathwayTopologyAnalysis {
             kb.put(p,knowledgeBase.getMetabolitesFromPathway(p));
         }
         this.g = topology;
-        Double defaultValue = 1.0/compoundOfInterest.size();
-        this.data=compoundOfInterest.stream().
-                collect(Collectors.toMap(c -> c, c -> defaultValue));
-    }
-
-    /**
-     * Instantiate class to perform Pathway Topology Analysis
-     * @param knowledgeBase Map affiliating metabolites to pathways
-     * @param topology CompoundGraph storing metabolites' relationships
-     * @param compoundOfInterest Set containing input data (significantly overrepresented metabolites for example)
-     */
-    public PathwayTopologyAnalysis(Map<BioPathway,Collection<BioMetabolite>> knowledgeBase, CompoundGraph topology, Set<BioMetabolite> compoundOfInterest){
-        this.kb=knowledgeBase;
-        this.g = topology;
-        Double defaultValue = 1.0/compoundOfInterest.size();
-        this.data=compoundOfInterest.stream().
-                collect(Collectors.toMap(c -> c, c -> defaultValue));
+        this.data=compoundOfInterest;
     }
 
     /**
@@ -104,11 +73,9 @@ public class PathwayTopologyAnalysis {
             //create background data (i.e dataset with all compounds)
             Set<BioMetabolite> background = kb.values().stream().flatMap(Collection::stream)
                     .collect(Collectors.toSet());
-            Map<BioMetabolite, Double> backgroundDefaultValues =  background
-                    .stream().collect(Collectors.toMap(k -> k, k-> 1.0/background.size()));
 
             //compute pathway score as if all their compounds were in dataset (recompute individual score for all compounds)
-            Map<BioPathway,Double> byPathwayBackgroundScore = computePathwayScore(backgroundDefaultValues, g, scoring, aggregation);
+            Map<BioPathway,Double> byPathwayBackgroundScore = computePathwayScore(background, g, scoring, aggregation);
 
             //set final pathway score as ratio between pathway score computed from data and theoretical maximal pathway score
             pathwayFinalScore = Stream.concat(pathwayFinalScore.entrySet().stream(), byPathwayBackgroundScore.entrySet().stream())
@@ -123,7 +90,7 @@ public class PathwayTopologyAnalysis {
     /*
     Compute topology pathway analysis
     */
-    private Map<BioPathway,Double> computePathwayScore(Map<BioMetabolite, Double> data, CompoundGraph g, IndividualScoringStrategy scoring, AggregationStrategy aggregation){
+    private Map<BioPathway,Double> computePathwayScore(Set<BioMetabolite> data, CompoundGraph g, IndividualScoringStrategy scoring, AggregationStrategy aggregation){
         //From compounds and given interaction network, compute topology score for each compound, using scoring strategy.
         Map<BioMetabolite,Double> individualScore = scoring.apply(data,g);
         //From knowledge base, get the pathway memberships and collect component's scores.
@@ -151,18 +118,18 @@ public class PathwayTopologyAnalysis {
     /**
      * Interface for individual scoring strategy, computing metabolites impact
      */
-    public interface IndividualScoringStrategy extends BiFunction<Map<BioMetabolite,Double>,CompoundGraph,Map<BioMetabolite,Double>> {
+    public interface IndividualScoringStrategy extends BiFunction<Set<BioMetabolite>,CompoundGraph,Map<BioMetabolite,Double>> {
 
         /**
          * Use betweenness as scoring function, i.e. the proportion of shortest paths passing through a given node (excluding paths where it is the starting or ending node).
          * @return a map of compounds and their respective impact score.
          */
         static IndividualScoringStrategy betweenness(){
-            return  (Map<BioMetabolite,Double> data,CompoundGraph graph) -> {
+            return  (Set<BioMetabolite> data,CompoundGraph graph) -> {
             PathBasedCentrality<BioMetabolite, ReactionEdge,CompoundGraph> centralityAnalyser = new PathBasedCentrality<>(graph);
             Map<BioMetabolite, Integer> betweenness = centralityAnalyser.getGeodesicBetweenness();
             return betweenness.entrySet().stream()
-                    .filter(e -> data.containsKey(e.getKey()))
+                    .filter(e -> data.contains(e.getKey()))
                     .collect(Collectors.toMap(e -> e.getKey(), e -> Double.valueOf(betweenness.get(e.getKey()))));
             };
         }
@@ -184,7 +151,7 @@ public class PathwayTopologyAnalysis {
          * @return a map of compounds and their respective impact score.
          */
         static IndividualScoringStrategy pageRank(Double dampingFactor, int maxIter, double tolerance){
-            return  (Map<BioMetabolite,Double> data,CompoundGraph graph) -> {
+            return  (Set<BioMetabolite> data,CompoundGraph graph) -> {
                 EigenVectorCentrality<BioMetabolite, ReactionEdge,CompoundGraph> centralityAnalyser = new EigenVectorCentrality<>(graph);
                 centralityAnalyser.addJumpProb(graph.vertexSet().stream()
                         .map(BioMetabolite::getId).collect(Collectors.toSet()), dampingFactor);
@@ -198,7 +165,7 @@ public class PathwayTopologyAnalysis {
          * @return a map of compounds and their respective impact score.
          */
         static IndividualScoringStrategy outDegree(){
-            return  (Map<BioMetabolite,Double> data,CompoundGraph graph) -> data.keySet().stream()
+            return  (Set<BioMetabolite> data,CompoundGraph graph) -> data.stream()
                     .collect(Collectors.toMap(v -> v, v -> Double.valueOf(graph.outDegreeOf(v))));
         }
 
@@ -208,7 +175,7 @@ public class PathwayTopologyAnalysis {
          * @return a map of compounds and their respective impact score.
          */
         static IndividualScoringStrategy neighbors(){
-            return  (Map<BioMetabolite,Double> data,CompoundGraph graph) -> data.keySet().stream()
+            return  (Set<BioMetabolite> data,CompoundGraph graph) -> data.stream()
                     .collect(Collectors.toMap(v -> v, v -> Double.valueOf(graph.neighborListOf(v).size())));
         }
 
-- 
GitLab


From c5d9d69c4cc6ace3a01661928cb2966576a5f7a4 Mon Sep 17 00:00:00 2001
From: cfrainay <clement.frainay@inrae.fr>
Date: Thu, 6 Jul 2023 17:46:18 +0200
Subject: [PATCH 07/20] add filter to pagerank individual score (otherwise
 return whole score map)

---
 .../computation/analyze/PathwayTopologyAnalysis.java          | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java
index ef4dd399b..15455dc73 100644
--- a/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java
+++ b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java
@@ -156,7 +156,9 @@ public class PathwayTopologyAnalysis {
                 centralityAnalyser.addJumpProb(graph.vertexSet().stream()
                         .map(BioMetabolite::getId).collect(Collectors.toSet()), dampingFactor);
                 Map<String, Double> pageRank = centralityAnalyser.computePowerMethodPageRank(dampingFactor,maxIter,tolerance);
-                return pageRank.entrySet().stream().collect(Collectors.toMap(e -> graph.getVertex(e.getKey()), e -> e.getValue()));
+                return pageRank.entrySet().stream()
+                        .filter(e -> data.stream().map(BioMetabolite::getId).collect(Collectors.toSet()).contains(e.getKey()))
+                        .collect(Collectors.toMap(e -> graph.getVertex(e.getKey()), e -> e.getValue()));
             };
         }
 
-- 
GitLab


From 50487540f5c8106403ba43734d444ca5d6388ef0 Mon Sep 17 00:00:00 2001
From: cfrainay <clement.frainay@inrae.fr>
Date: Thu, 6 Jul 2023 17:46:35 +0200
Subject: [PATCH 08/20] add unit tests

d
---
 .../TestPathwayTopologyAnalysis.java          | 266 ++++++++++++++++++
 1 file changed, 266 insertions(+)
 create mode 100644 met4j-graph/src/test/java/fr/inrae/toulouse/metexplore/met4j_graph/TestPathwayTopologyAnalysis.java

diff --git a/met4j-graph/src/test/java/fr/inrae/toulouse/metexplore/met4j_graph/TestPathwayTopologyAnalysis.java b/met4j-graph/src/test/java/fr/inrae/toulouse/metexplore/met4j_graph/TestPathwayTopologyAnalysis.java
new file mode 100644
index 000000000..c84d30850
--- /dev/null
+++ b/met4j-graph/src/test/java/fr/inrae/toulouse/metexplore/met4j_graph/TestPathwayTopologyAnalysis.java
@@ -0,0 +1,266 @@
+package fr.inrae.toulouse.metexplore.met4j_graph;
+
+import fr.inrae.toulouse.metexplore.met4j_core.biodata.*;
+import fr.inrae.toulouse.metexplore.met4j_core.biodata.collection.BioCollection;
+import fr.inrae.toulouse.metexplore.met4j_graph.computation.analyze.PathwayTopologyAnalysis;
+import fr.inrae.toulouse.metexplore.met4j_graph.computation.analyze.centrality.EigenVectorCentrality;
+import fr.inrae.toulouse.metexplore.met4j_graph.computation.analyze.centrality.PathBasedCentrality;
+import fr.inrae.toulouse.metexplore.met4j_graph.core.compound.CompoundGraph;
+import fr.inrae.toulouse.metexplore.met4j_graph.core.compound.ReactionEdge;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+
+import static org.junit.Assert.assertEquals;
+
+public class TestPathwayTopologyAnalysis {
+
+    public static CompoundGraph toyGraph;
+    public static BioNetwork toyNetwork;
+    public static BioMetabolite a, b, c, d, e, f, h, g;
+    public static BioReaction r1,r2,r3,r4,r5,r6,r7,r8;
+    public static BioPathway x,y,z;
+    public static BioCompartment na;
+    public static PathBasedCentrality<BioMetabolite, ReactionEdge, CompoundGraph> toyMeasure;
+
+    @BeforeClass
+    public static void init() {
+
+        a = new BioMetabolite("a");
+        b = new BioMetabolite("b");
+        c = new BioMetabolite("c");
+        d = new BioMetabolite("d");
+        e = new BioMetabolite("e");
+        f = new BioMetabolite("f");
+        h = new BioMetabolite("h");
+        g = new BioMetabolite("g");
+
+        r1 = new BioReaction("r1");r1.setReversible(true);
+        r2 = new BioReaction("r2");r2.setReversible(true);
+        r3 = new BioReaction("r3");r3.setReversible(true);
+        r4 = new BioReaction("r4");r4.setReversible(true);
+        r5 = new BioReaction("r5");r5.setReversible(true);
+        r6 = new BioReaction("r6");r6.setReversible(true);
+        r7 = new BioReaction("r7");r7.setReversible(true);
+        r8 = new BioReaction("r8");r8.setReversible(true);
+
+        x = new BioPathway("x");
+        y = new BioPathway("y");
+        z = new BioPathway("z");
+
+        na = new BioCompartment("NA");
+
+        ReactionEdge ab = new ReactionEdge(a, b, r1);
+        ReactionEdge ba = new ReactionEdge(b, a, r1);
+        ReactionEdge bc = new ReactionEdge(b, c, r2);
+        ReactionEdge cb = new ReactionEdge(c, b, r2);
+        ReactionEdge cd = new ReactionEdge(c, d, r3);
+        ReactionEdge dc = new ReactionEdge(d, c, r3);
+        ReactionEdge de = new ReactionEdge(d, e, r4);
+        ReactionEdge ed = new ReactionEdge(e, d, r4);
+        ReactionEdge ec = new ReactionEdge(e, c, r5);
+        ReactionEdge ce = new ReactionEdge(c, e, r5);
+        ReactionEdge af = new ReactionEdge(a, f, r6);
+        ReactionEdge fa = new ReactionEdge(f, a, r6);
+        ReactionEdge fg = new ReactionEdge(f, g, r7);
+        ReactionEdge gf = new ReactionEdge(g, f, r7);
+        ReactionEdge ga = new ReactionEdge(g, a, r8);
+        ReactionEdge ag = new ReactionEdge(a, g, r8);
+
+        toyNetwork=new BioNetwork();
+        toyNetwork.add(a,b,c,d,e,f,g);
+        toyNetwork.add(r1,r2,r3,r4,r5,r6,r7,r8);
+        toyNetwork.add(x,y,z);
+        toyNetwork.add(na);
+
+        toyNetwork.affectToCompartment(na,a,b,c,d,e,f,g);
+
+        toyNetwork.affectLeft(r1,1.0,na,a);
+        toyNetwork.affectRight(r1,1.0,na,b);
+        toyNetwork.affectLeft(r2,1.0,na,b);
+        toyNetwork.affectRight(r2,1.0,na,c);
+        toyNetwork.affectLeft(r3,1.0,na,c);
+        toyNetwork.affectRight(r3,1.0,na,d);
+        toyNetwork.affectLeft(r4,1.0,na,d);
+        toyNetwork.affectRight(r4,1.0,na,e);
+        toyNetwork.affectLeft(r5,1.0,na,e);
+        toyNetwork.affectRight(r5,1.0,na,c);
+        toyNetwork.affectLeft(r6,1.0,na,a);
+        toyNetwork.affectRight(r6,1.0,na,f);
+        toyNetwork.affectLeft(r7,1.0,na,f);
+        toyNetwork.affectRight(r7,1.0,na,g);
+        toyNetwork.affectLeft(r8,1.0,na,g);
+        toyNetwork.affectRight(r8,1.0,na,a);
+
+        toyNetwork.affectToPathway(x,r1,r6,r7,r8);
+        toyNetwork.affectToPathway(y,r2);
+        toyNetwork.affectToPathway(z,r3,r4,r5);
+
+        toyGraph = new CompoundGraph();
+        toyGraph.addVertex(a);
+        toyGraph.addVertex(b);
+        toyGraph.addVertex(c);
+        toyGraph.addVertex(d);
+        toyGraph.addVertex(e);
+        toyGraph.addVertex(f);
+        toyGraph.addVertex(g);
+        toyGraph.addEdge(a, b, ab);
+        toyGraph.addEdge(b, c, bc);
+        toyGraph.addEdge(c, d, cd);
+        toyGraph.addEdge(d, e, de);
+        toyGraph.addEdge(e, c, ec);
+        toyGraph.addEdge(a, f, af);
+        toyGraph.addEdge(f, g, fg);
+        toyGraph.addEdge(g, a, ga);
+        toyGraph.addEdge(b, a, ba);
+        toyGraph.addEdge(c, b, cb);
+        toyGraph.addEdge(d, c, dc);
+        toyGraph.addEdge(e, d, ed);
+        toyGraph.addEdge(c, e, ce);
+        toyGraph.addEdge(f, a, fa);
+        toyGraph.addEdge(g, f, gf);
+        toyGraph.addEdge(a, g, ag);
+
+        toyMeasure = new PathBasedCentrality<>(toyGraph);
+    }
+
+    @Test
+    public void testBetweenness() {
+
+        HashSet<BioMetabolite> noi = new HashSet<>();
+        noi.add(a);
+        noi.add(b);
+        noi.add(e);
+        PathwayTopologyAnalysis analysis = new PathwayTopologyAnalysis(toyNetwork,toyGraph,noi);
+
+        Map<BioMetabolite, Integer> toyBetweenness = toyMeasure.getGeodesicBetweenness();
+
+        Map<BioPathway,Double> res = analysis.run(PathwayTopologyAnalysis.IndividualScoringStrategy.betweenness(),
+                PathwayTopologyAnalysis.AggregationStrategy.rawSum());
+
+        assertEquals(toyBetweenness.get(a)+toyBetweenness.get(b), res.get(x), 0.00000001);
+        assertEquals(toyBetweenness.get(b), res.get(y),  0.00000001);
+        assertEquals(toyBetweenness.get(e), res.get(z), 0.00000001);
+    }
+
+    @Test
+    public void testNormalization() {
+
+        HashSet<BioMetabolite> noi = new HashSet<>();
+        noi.add(a);
+        noi.add(b);
+        noi.add(e);
+        PathwayTopologyAnalysis analysis = new PathwayTopologyAnalysis(toyNetwork,toyGraph,noi).useNormalization();
+
+        Map<BioMetabolite, Integer> toyBetweenness = toyMeasure.getGeodesicBetweenness();
+
+        Map<BioPathway,Double> res = analysis.run(PathwayTopologyAnalysis.IndividualScoringStrategy.betweenness(),
+                PathwayTopologyAnalysis.AggregationStrategy.rawSum());
+
+
+        assertEquals(Double.valueOf(toyBetweenness.get(a)+toyBetweenness.get(b)) / Double.valueOf(toyBetweenness.get(a)+toyBetweenness.get(b)+toyBetweenness.get(f)+toyBetweenness.get(g)), res.get(x), 0.00000001);
+        assertEquals(Double.valueOf(toyBetweenness.get(b)) / Double.valueOf(toyBetweenness.get(b)+toyBetweenness.get(c)), res.get(y),  0.00000001);
+        assertEquals(Double.valueOf(toyBetweenness.get(e)) / Double.valueOf(toyBetweenness.get(c)+toyBetweenness.get(d)+toyBetweenness.get(e)), res.get(z), 0.00000001);
+    }
+
+    @Test
+    public void testBetweennessII() {
+
+        HashSet<BioMetabolite> noi = new HashSet<>();
+        noi.add(a);
+        noi.add(b);
+        PathwayTopologyAnalysis analysis = new PathwayTopologyAnalysis(toyNetwork,toyGraph,noi);
+
+        Map<BioMetabolite, Integer> toyBetweenness = toyMeasure.getGeodesicBetweenness();
+
+        Map<BioPathway,Double> res = analysis.run(PathwayTopologyAnalysis.IndividualScoringStrategy.betweenness(),
+                PathwayTopologyAnalysis.AggregationStrategy.rawSum());
+
+        assertEquals(toyBetweenness.get(a)+toyBetweenness.get(b), res.get(x), 0.00000001);
+        assertEquals(toyBetweenness.get(b), res.get(y),  0.00000001);
+        assertEquals(0, res.get(z), 0.00000001);
+    }
+
+    @Test
+    public void testOutDegree() {
+
+        HashSet<BioMetabolite> noi = new HashSet<>();
+        noi.add(a);
+        noi.add(b);
+        noi.add(e);
+        PathwayTopologyAnalysis analysis = new PathwayTopologyAnalysis(toyNetwork,toyGraph,noi);
+
+        Map<BioPathway,Double> res = analysis.run(PathwayTopologyAnalysis.IndividualScoringStrategy.outDegree(),
+                PathwayTopologyAnalysis.AggregationStrategy.rawSum());
+
+        assertEquals(5, res.get(x), 0.00000001);
+        assertEquals(2, res.get(y),  0.00000001);
+        assertEquals(2,res.get(z),  0.00000001);
+    }
+
+    @Test
+    public void testMapConstructor() {
+
+        HashSet<BioMetabolite> noi = new HashSet<>();
+        noi.add(a);
+        noi.add(b);
+        noi.add(e);
+        HashMap<BioPathway, Collection<BioMetabolite>> kb = new HashMap<>();
+        BioCollection<BioMetabolite> xCpds = new BioCollection<>();xCpds.add(g,f,a,b);kb.put(x,xCpds);
+        BioCollection<BioMetabolite> yCpds = new BioCollection<>();yCpds.add(b,c);kb.put(y,yCpds);
+        BioCollection<BioMetabolite> zCpds = new BioCollection<>();zCpds.add(c,e,d);kb.put(z,zCpds);
+
+
+        PathwayTopologyAnalysis analysis = new PathwayTopologyAnalysis(kb,toyGraph,noi);
+
+        Map<BioPathway,Double> res = analysis.run(PathwayTopologyAnalysis.IndividualScoringStrategy.outDegree(),
+                PathwayTopologyAnalysis.AggregationStrategy.rawSum());
+
+        assertEquals(5, res.get(x), 0.00000001);
+        assertEquals(2, res.get(y),  0.00000001);
+        assertEquals(2,res.get(z),  0.00000001);
+    }
+
+    @Test
+    public void testNeighbours() {
+
+        HashSet<BioMetabolite> noi = new HashSet<>();
+        noi.add(a);
+        noi.add(b);
+        noi.add(e);
+        PathwayTopologyAnalysis analysis = new PathwayTopologyAnalysis(toyNetwork,toyGraph,noi);
+
+        Map<BioPathway,Double> res = analysis.run(PathwayTopologyAnalysis.IndividualScoringStrategy.neighbors(),
+                PathwayTopologyAnalysis.AggregationStrategy.rawSum());
+
+        assertEquals(5, res.get(x), 0.00000001);
+        assertEquals(2, res.get(y),  0.00000001);
+        assertEquals(2,res.get(z),  0.00000001);
+    }
+
+    @Test
+    public void testPageRank() {
+
+
+        HashSet<BioMetabolite> noi = new HashSet<>();
+        noi.add(a);
+        noi.add(b);
+        noi.add(e);
+        PathwayTopologyAnalysis analysis = new PathwayTopologyAnalysis(toyNetwork,toyGraph,noi);
+
+        EigenVectorCentrality<BioMetabolite,ReactionEdge,CompoundGraph> toyMeasure2 = new EigenVectorCentrality<>(toyGraph);
+        HashMap<String, Double> toyPageRank = toyMeasure2.computePowerMethodPageRank(0.85,15000,0.001);
+
+        Map<BioPathway,Double> res = analysis.run(PathwayTopologyAnalysis.IndividualScoringStrategy.pageRank(0.85,15000,0.001),
+                PathwayTopologyAnalysis.AggregationStrategy.rawSum());
+
+        assertEquals(toyPageRank.get(a.getId())+toyPageRank.get(b.getId()), res.get(x), 0.00000001);
+        assertEquals(toyPageRank.get(b.getId()), res.get(y),  0.00000001);
+        assertEquals(toyPageRank.get(e.getId()), res.get(z), 0.00000001);
+
+    }
+}
-- 
GitLab


From c03abad5b906823a01dcdf10c66f0e031bd75621 Mon Sep 17 00:00:00 2001
From: cfrainay <clement.frainay@inrae.fr>
Date: Thu, 6 Jul 2023 18:07:51 +0200
Subject: [PATCH 09/20] Stub TPA app

---
 .../TopologicalPathwayAnalysis.java           | 227 ++++++++++++++++++
 1 file changed, 227 insertions(+)
 create mode 100644 met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java

diff --git a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java
new file mode 100644
index 000000000..c5034a7b3
--- /dev/null
+++ b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java
@@ -0,0 +1,227 @@
+package fr.inrae.toulouse.metexplore.met4j_toolbox.networkAnalysis;
+
+import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioMetabolite;
+import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioNetwork;
+import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioPathway;
+import fr.inrae.toulouse.metexplore.met4j_core.biodata.collection.BioCollection;
+import fr.inrae.toulouse.metexplore.met4j_graph.computation.analyze.PathwayTopologyAnalysis;
+import fr.inrae.toulouse.metexplore.met4j_graph.computation.connect.weighting.*;
+import fr.inrae.toulouse.metexplore.met4j_graph.computation.transform.VertexContraction;
+import fr.inrae.toulouse.metexplore.met4j_graph.core.WeightingPolicy;
+import fr.inrae.toulouse.metexplore.met4j_graph.core.compound.CompoundGraph;
+import fr.inrae.toulouse.metexplore.met4j_graph.core.compound.ReactionEdge;
+import fr.inrae.toulouse.metexplore.met4j_graph.io.Bionetwork2BioGraph;
+import fr.inrae.toulouse.metexplore.met4j_graph.io.NodeMapping;
+import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.JsbmlReader;
+import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.Met4jSbmlReaderException;
+import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.plugin.FBCParser;
+import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.plugin.GroupPathwayParser;
+import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.plugin.NotesParser;
+import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.plugin.PackageParser;
+import fr.inrae.toulouse.metexplore.met4j_toolbox.generic.AbstractMet4jApplication;
+import fr.inrae.toulouse.metexplore.met4j_toolbox.generic.annotations.EnumFormats;
+import fr.inrae.toulouse.metexplore.met4j_toolbox.generic.annotations.EnumParameterTypes;
+import fr.inrae.toulouse.metexplore.met4j_toolbox.generic.annotations.Format;
+import fr.inrae.toulouse.metexplore.met4j_toolbox.generic.annotations.ParameterType;
+import org.kohsuke.args4j.Option;
+
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Map;
+
+public class TopologicalPathwayAnalysis extends AbstractMet4jApplication {
+
+    @Format(name= EnumFormats.Sbml)
+    @ParameterType(name= EnumParameterTypes.InputFile)
+    @Option(name = "-s", usage = "input SBML file", required = true)
+    public String inputPath = null;
+
+    @ParameterType(name= EnumParameterTypes.InputFile)
+    @Format(name= EnumFormats.Txt)
+    @Option(name = "-sc", usage = "input Side compound file (recommended)", required = false)
+    public String inputSide = null;
+
+    @ParameterType(name= EnumParameterTypes.OutputFile)
+    @Format(name= EnumFormats.Gml)
+    @Option(name = "-o", usage = "output Graph file", required = true)
+    public String outputPath = null;
+
+    enum strategy {no, by_name,by_id}
+    @Option(name = "-mc", aliases = {"--mergecomp"}, usage = "merge compartments. " +
+            "Use names if consistent and unambiguous across compartments, or identifiers if compartment suffix is present (id in form \"xxx_y\" with xxx as base identifier and y as compartment label).")
+    public strategy mergingStrat = strategy.no;
+    public String idRegex = "^(\\w+)_\\w$";
+
+
+    @Option(name = "-ri", aliases = {"--removeIsolatedNodes"}, usage = "remove isolated nodes", required = false)
+    public boolean removeIsolated = false;
+
+    @ParameterType(name=EnumParameterTypes.InputFile)
+    @Format(name=EnumFormats.Tsv)
+    @Option(name = "-cw", aliases = {"--customWeights"}, usage = "an optional file containing weights for compound pairs", forbids = {"-dw"})
+    public String weightFile = null;
+
+    @Option(name = "-un", aliases = {"--undirected"}, usage = "create as undirected", required = false)
+    public boolean undirected = false;
+
+
+    @Format(name= EnumFormats.Txt)
+    @ParameterType(name= EnumParameterTypes.InputFile)
+    @Option(name = "-noi", usage = "input Node of interest file", required = true)
+    public String dataPath = null;
+
+    @Option(name = "-out", aliases = {"--outDegree"}, usage = "use out-degree as scoring function instead of betweenness", required = false)
+    public boolean out = false;
+
+
+    public static void main(String[] args)  {
+
+        TopologicalPathwayAnalysis app = new TopologicalPathwayAnalysis();
+
+        app.parseArguments(args);
+
+        app.run();
+
+    }
+
+
+    public void run() {
+        //open file
+        FileWriter fw = null;
+        try {
+            fw = new FileWriter(outputPath);
+        } catch (IOException e) {
+            System.err.println("Error while opening the output file");
+            System.err.println(e.getMessage());
+            System.exit(1);
+        }
+
+        System.out.print("Reading SBML...");
+        JsbmlReader reader = new JsbmlReader(this.inputPath);
+        ArrayList<PackageParser> pkgs = new ArrayList<>(Arrays.asList(
+                new NotesParser(false), new FBCParser(), new GroupPathwayParser()));
+
+        BioNetwork network = null;
+
+        try {
+            network = reader.read(pkgs);
+        } catch (Met4jSbmlReaderException e) {
+            System.err.println("Error while reading the SBML file");
+            System.err.println(e.getMessage());
+            System.exit(1);
+        }
+        System.out.println(" Done.\n\n");
+
+
+        System.out.print("Buildinig Network...");
+        Bionetwork2BioGraph builder = new Bionetwork2BioGraph(network);
+        CompoundGraph graph = builder.getCompoundGraph();
+
+        //Graph processing: side compound removal [optional]
+        if (inputSide != null) {
+            System.err.println("removing side compounds...");
+            NodeMapping<BioMetabolite, ReactionEdge, CompoundGraph> mapper = new NodeMapping<>(graph).skipIfNotFound();
+            BioCollection<BioMetabolite> sideCpds = null;
+            try {
+                sideCpds = mapper.map(inputSide);
+            } catch (IOException e) {
+                System.err.println("Error while reading the side compound file");
+                System.err.println(e.getMessage());
+                System.exit(1);
+            }
+            boolean removed = graph.removeAllVertices(sideCpds);
+            if (removed) System.err.println(sideCpds.size() + " compounds removed.");
+        }
+
+        //Graph processing: set weights [optional]
+        WeightingPolicy<BioMetabolite, ReactionEdge, CompoundGraph> wp = new UnweightedPolicy<>();
+        if (weightFile != null) {
+            System.err.println("Setting edge weights...");
+            wp = new WeightsFromFile(weightFile);
+            wp.setWeight(graph);
+            System.out.println(" Done.");
+        }else{
+            wp.setWeight(graph);
+        }
+
+        //invert graph as undirected (copy edge weight to reversed edge)
+        if(undirected){
+            System.out.print("Create Undirected...");
+            graph.asUndirected();
+            System.out.println(" Done.");
+        }
+
+        //merge compartment
+        if(mergingStrat!=strategy.no){
+            System.out.print("Merging compartments...");
+            VertexContraction vc = new VertexContraction();
+            VertexContraction.Mapper merger = mergingStrat.equals(strategy.by_name) ? new VertexContraction.MapByName() : new VertexContraction.MapByIdSubString(idRegex);
+            graph = vc.decompartmentalize(graph, merger);
+            System.out.println(" Done.");
+        }
+
+        //remove isolated nodes
+        if(removeIsolated){
+            System.out.println("Remove isolated nodes...");
+            HashSet<BioMetabolite> nodes = new HashSet<>(graph.vertexSet());
+            graph.removeIsolatedNodes();
+            nodes.removeAll(graph.vertexSet());
+            for(BioMetabolite n : nodes){
+                System.out.println("\tremoving " + n.getName());
+            }
+            System.out.println(" Done.");
+        }
+
+        System.out.println(" Network successfully created.\n\n");
+
+        System.out.println("Importing nodes of interest");
+        NodeMapping<BioMetabolite, ReactionEdge, CompoundGraph> mapper = new NodeMapping<>(graph).throwErrorIfNotFound();
+        HashSet<BioMetabolite> data = null;
+        try {
+            data = new HashSet<>(mapper.map(dataPath));
+        } catch (IOException e) {
+            System.err.println("Error while reading the source metabolite file");
+            System.err.println(e.getMessage());
+            System.exit(1);
+        }
+        System.out.println("Computing Pathway topology Analysis");
+
+
+        System.out.println("Computing Pathway topology Analysis");
+        PathwayTopologyAnalysis computor = new PathwayTopologyAnalysis(network,graph,data).useNormalization();
+        PathwayTopologyAnalysis.IndividualScoringStrategy strat = out ? PathwayTopologyAnalysis.IndividualScoringStrategy.outDegree() : PathwayTopologyAnalysis.IndividualScoringStrategy.betweenness() ;
+        Map<BioPathway, Double> res = computor.run(strat,PathwayTopologyAnalysis.AggregationStrategy.rawSum());
+        System.out.println("Computing Pathway topology Analysis");
+
+        //export results
+        System.out.print("Exporting...");
+        try {
+            for (Map.Entry<BioPathway, Double> e : res.entrySet()) {
+                BioPathway p = e.getKey();
+                fw.write(p.getId() + "\t" + p.getName() + "\t" + e.getValue() + "\n");
+            }
+            fw.close();
+        } catch (IOException e) {
+            System.err.println("Error while writing the result file");
+            System.err.println(e.getMessage());
+            System.exit(1);
+        }
+        System.err.println("Done.");
+        return;
+    }
+
+    @Override
+    public String getLabel() {return this.getClass().getSimpleName();}
+
+    @Override
+    public String getLongDescription() {
+        return "";
+    }
+
+    @Override
+    public String getShortDescription() {return "";}
+}
+
-- 
GitLab


From 68743f650ab13333e60db3ef2bc9743e5e7f7fad Mon Sep 17 00:00:00 2001
From: cfrainay <clement.frainay@inrae.fr>
Date: Fri, 7 Jul 2023 16:55:17 +0200
Subject: [PATCH 10/20] improve doc, add description

---
 .../TopologicalPathwayAnalysis.java                | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java
index c5034a7b3..89430fb54 100644
--- a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java
+++ b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java
@@ -46,7 +46,7 @@ public class TopologicalPathwayAnalysis extends AbstractMet4jApplication {
 
     @ParameterType(name= EnumParameterTypes.OutputFile)
     @Format(name= EnumFormats.Gml)
-    @Option(name = "-o", usage = "output Graph file", required = true)
+    @Option(name = "-o", usage = "output result file", required = true)
     public String outputPath = null;
 
     enum strategy {no, by_name,by_id}
@@ -61,7 +61,7 @@ public class TopologicalPathwayAnalysis extends AbstractMet4jApplication {
 
     @ParameterType(name=EnumParameterTypes.InputFile)
     @Format(name=EnumFormats.Tsv)
-    @Option(name = "-cw", aliases = {"--customWeights"}, usage = "an optional file containing weights for compound pairs", forbids = {"-dw"})
+    @Option(name = "-cw", aliases = {"--customWeights"}, usage = "an optional file containing weights for compound pairs. Edges not found in file will be removed", forbids = {"-dw"})
     public String weightFile = null;
 
     @Option(name = "-un", aliases = {"--undirected"}, usage = "create as undirected", required = false)
@@ -140,7 +140,7 @@ public class TopologicalPathwayAnalysis extends AbstractMet4jApplication {
         WeightingPolicy<BioMetabolite, ReactionEdge, CompoundGraph> wp = new UnweightedPolicy<>();
         if (weightFile != null) {
             System.err.println("Setting edge weights...");
-            wp = new WeightsFromFile(weightFile);
+            wp = new WeightsFromFile(weightFile).removeEdgeNotInFile();
             wp.setWeight(graph);
             System.out.println(" Done.");
         }else{
@@ -218,10 +218,14 @@ public class TopologicalPathwayAnalysis extends AbstractMet4jApplication {
 
     @Override
     public String getLongDescription() {
-        return "";
+        return "Run a Topological Pathway Analysis (TPA) to identify key pathways based on topological properties of its mapped compounds." +
+                " From a list of compounds of interest, the app compute their betweenness centrality (which quantifies how often a compound acts as a intermediary along the shortest paths between pairs of other compounds in the network," +
+                " which, if high, suggest a critical role in the overall flow within the network). Each pathway is scored according to the summed centrality of its metabolites found in the dataset." +
+                " Alternatively to the betweenness, one can make use of the out-degree (the number of outgoing link, i.e. number of direct metabolic product) as a criterion of importance." +
+                " TPA is complementary to statistical enrichment analysis to ensures a more meaningful interpretation of the data, by taking into account the influence of identified compounds on the structure of the pathways.";
     }
 
     @Override
-    public String getShortDescription() {return "";}
+    public String getShortDescription() {return "Run a Topological Pathway Analysis to identify key pathways based on topological properties of its constituting compounds.";}
 }
 
-- 
GitLab


From 8214b69b0e72f34166233050f5d5b6ab7a99d8b7 Mon Sep 17 00:00:00 2001
From: Ludovic Cottret <ludovic.cottret@inra.fr>
Date: Tue, 11 Jul 2023 11:46:53 +0200
Subject: [PATCH 11/20] change arguments doc

---
 .../networkAnalysis/TopologicalPathwayAnalysis.java             | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java
index 89430fb54..a2a3a6918 100644
--- a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java
+++ b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java
@@ -46,7 +46,7 @@ public class TopologicalPathwayAnalysis extends AbstractMet4jApplication {
 
     @ParameterType(name= EnumParameterTypes.OutputFile)
     @Format(name= EnumFormats.Gml)
-    @Option(name = "-o", usage = "output result file", required = true)
+    @Option(name = "-o", usage = "output result file (Gml format)", required = true)
     public String outputPath = null;
 
     enum strategy {no, by_name,by_id}
-- 
GitLab


From f7ed13c8a3e8916973a46b478c0cbe410274951c Mon Sep 17 00:00:00 2001
From: Ludovic Cottret <ludovic.cottret@inra.fr>
Date: Tue, 11 Jul 2023 11:47:16 +0200
Subject: [PATCH 12/20] improve arguments doc

---
 .../networkAnalysis/TopologicalPathwayAnalysis.java             | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java
index a2a3a6918..93f0c473e 100644
--- a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java
+++ b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java
@@ -64,7 +64,7 @@ public class TopologicalPathwayAnalysis extends AbstractMet4jApplication {
     @Option(name = "-cw", aliases = {"--customWeights"}, usage = "an optional file containing weights for compound pairs. Edges not found in file will be removed", forbids = {"-dw"})
     public String weightFile = null;
 
-    @Option(name = "-un", aliases = {"--undirected"}, usage = "create as undirected", required = false)
+    @Option(name = "-un", aliases = {"--undirected"}, usage = "the compound graph built from the metabolic network and used for computations will undirected, i.e. the reaction directions won't be taken into account", required = false)
     public boolean undirected = false;
 
 
-- 
GitLab


From c4e2dc39cd0efa598397d1eff755808ab6b73771 Mon Sep 17 00:00:00 2001
From: Ludovic Cottret <ludovic.cottret@inra.fr>
Date: Tue, 11 Jul 2023 11:47:32 +0200
Subject: [PATCH 13/20] improve arguments doc

---
 .../networkAnalysis/TopologicalPathwayAnalysis.java             | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java
index 93f0c473e..b4035ee5c 100644
--- a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java
+++ b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java
@@ -70,7 +70,7 @@ public class TopologicalPathwayAnalysis extends AbstractMet4jApplication {
 
     @Format(name= EnumFormats.Txt)
     @ParameterType(name= EnumParameterTypes.InputFile)
-    @Option(name = "-noi", usage = "input Node of interest file", required = true)
+    @Option(name = "-noi", usage = "file containing the list of metabolites of interests (one per line)", required = true)
     public String dataPath = null;
 
     @Option(name = "-out", aliases = {"--outDegree"}, usage = "use out-degree as scoring function instead of betweenness", required = false)
-- 
GitLab


From 8e7272374c4f9bad92f0eede08dcd83b52fa5a25 Mon Sep 17 00:00:00 2001
From: Ludovic Cottret <ludovic.cottret@inra.fr>
Date: Tue, 11 Jul 2023 11:47:46 +0200
Subject: [PATCH 14/20] fix typo

---
 .../networkAnalysis/TopologicalPathwayAnalysis.java             | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java
index b4035ee5c..368a4f9b2 100644
--- a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java
+++ b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java
@@ -116,7 +116,7 @@ public class TopologicalPathwayAnalysis extends AbstractMet4jApplication {
         System.out.println(" Done.\n\n");
 
 
-        System.out.print("Buildinig Network...");
+        System.out.print("Building Network...");
         Bionetwork2BioGraph builder = new Bionetwork2BioGraph(network);
         CompoundGraph graph = builder.getCompoundGraph();
 
-- 
GitLab


From c7799c11823f36c5fc84e52128097d1235992699 Mon Sep 17 00:00:00 2001
From: Ludovic Cottret <ludovic.cottret@inra.fr>
Date: Tue, 11 Jul 2023 11:47:54 +0200
Subject: [PATCH 15/20] improve arguments doc

---
 .../networkAnalysis/TopologicalPathwayAnalysis.java             | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java
index 368a4f9b2..0def9f53c 100644
--- a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java
+++ b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java
@@ -122,7 +122,7 @@ public class TopologicalPathwayAnalysis extends AbstractMet4jApplication {
 
         //Graph processing: side compound removal [optional]
         if (inputSide != null) {
-            System.err.println("removing side compounds...");
+            System.out.println("Removing side compounds...");
             NodeMapping<BioMetabolite, ReactionEdge, CompoundGraph> mapper = new NodeMapping<>(graph).skipIfNotFound();
             BioCollection<BioMetabolite> sideCpds = null;
             try {
-- 
GitLab


From 63bedb26c4d8d7bfa49eb0754b52537642671a12 Mon Sep 17 00:00:00 2001
From: Ludovic Cottret <ludovic.cottret@inra.fr>
Date: Tue, 11 Jul 2023 11:48:17 +0200
Subject: [PATCH 16/20] change console output

---
 .../networkAnalysis/TopologicalPathwayAnalysis.java             | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java
index 0def9f53c..378db4858 100644
--- a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java
+++ b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java
@@ -133,7 +133,7 @@ public class TopologicalPathwayAnalysis extends AbstractMet4jApplication {
                 System.exit(1);
             }
             boolean removed = graph.removeAllVertices(sideCpds);
-            if (removed) System.err.println(sideCpds.size() + " compounds removed.");
+            if (removed) System.out.println(sideCpds.size() + " compounds removed.");
         }
 
         //Graph processing: set weights [optional]
-- 
GitLab


From ac159ba17f36461100c89c3d7f0b4dedc6521952 Mon Sep 17 00:00:00 2001
From: Ludovic Cottret <ludovic.cottret@inra.fr>
Date: Tue, 11 Jul 2023 11:48:21 +0200
Subject: [PATCH 17/20] change console output

---
 .../networkAnalysis/TopologicalPathwayAnalysis.java             | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java
index 378db4858..0374934bd 100644
--- a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java
+++ b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java
@@ -139,7 +139,7 @@ public class TopologicalPathwayAnalysis extends AbstractMet4jApplication {
         //Graph processing: set weights [optional]
         WeightingPolicy<BioMetabolite, ReactionEdge, CompoundGraph> wp = new UnweightedPolicy<>();
         if (weightFile != null) {
-            System.err.println("Setting edge weights...");
+            System.out.println("Setting edge weights...");
             wp = new WeightsFromFile(weightFile).removeEdgeNotInFile();
             wp.setWeight(graph);
             System.out.println(" Done.");
-- 
GitLab


From 44ea567cbb8ecc6af88228c5b1e10186159f31bf Mon Sep 17 00:00:00 2001
From: Ludovic Cottret <ludovic.cottret@inra.fr>
Date: Tue, 11 Jul 2023 11:48:25 +0200
Subject: [PATCH 18/20] change console output

---
 .../networkAnalysis/TopologicalPathwayAnalysis.java             | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java
index 0374934bd..60a44ee16 100644
--- a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java
+++ b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java
@@ -209,7 +209,7 @@ public class TopologicalPathwayAnalysis extends AbstractMet4jApplication {
             System.err.println(e.getMessage());
             System.exit(1);
         }
-        System.err.println("Done.");
+        System.out.println("Done.");
         return;
     }
 
-- 
GitLab


From 63f4b97251684a35bc4a744dabad33a625fe7739 Mon Sep 17 00:00:00 2001
From: cfrainay <clement.frainay@inrae.fr>
Date: Tue, 11 Jul 2023 18:45:14 +0200
Subject: [PATCH 19/20] fix error for normalization with compound in kb not in
 graph + filter non mapped pathways

---
 .../computation/analyze/PathwayTopologyAnalysis.java |  6 ++++--
 .../networkAnalysis/TopologicalPathwayAnalysis.java  | 12 ++++++------
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java
index 15455dc73..9dbbf0aaf 100644
--- a/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java
+++ b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java
@@ -70,8 +70,8 @@ public class PathwayTopologyAnalysis {
     public Map<BioPathway,Double> run(IndividualScoringStrategy scoring, AggregationStrategy aggregation){
         Map<BioPathway,Double> pathwayFinalScore = computePathwayScore(data, g, scoring, aggregation);
         if(normalize){
-            //create background data (i.e dataset with all compounds)
-            Set<BioMetabolite> background = kb.values().stream().flatMap(Collection::stream)
+            //create background data (i.e dataset with all compounds in network)
+            Set<BioMetabolite> background = kb.values().stream().filter(v -> g.vertexSet().contains(v)).flatMap(Collection::stream)
                     .collect(Collectors.toSet());
 
             //compute pathway score as if all their compounds were in dataset (recompute individual score for all compounds)
@@ -91,6 +91,8 @@ public class PathwayTopologyAnalysis {
     Compute topology pathway analysis
     */
     private Map<BioPathway,Double> computePathwayScore(Set<BioMetabolite> data, CompoundGraph g, IndividualScoringStrategy scoring, AggregationStrategy aggregation){
+        //filter kb to keep only mapped pathways
+        kb = kb.entrySet().stream().filter(e -> e.getValue().stream().anyMatch(data::contains)).collect(Collectors.toMap(e->e.getKey(),e->e.getValue()));
         //From compounds and given interaction network, compute topology score for each compound, using scoring strategy.
         Map<BioMetabolite,Double> individualScore = scoring.apply(data,g);
         //From knowledge base, get the pathway memberships and collect component's scores.
diff --git a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java
index 60a44ee16..5b2df5b72 100644
--- a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java
+++ b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/TopologicalPathwayAnalysis.java
@@ -61,7 +61,7 @@ public class TopologicalPathwayAnalysis extends AbstractMet4jApplication {
 
     @ParameterType(name=EnumParameterTypes.InputFile)
     @Format(name=EnumFormats.Tsv)
-    @Option(name = "-cw", aliases = {"--customWeights"}, usage = "an optional file containing weights for compound pairs. Edges not found in file will be removed", forbids = {"-dw"})
+    @Option(name = "-cw", aliases = {"--customWeights"}, usage = "an optional file containing weights for compound pairs, taken into account for betweenness computation. Edges not found in file will be removed", forbids = {"-dw"})
     public String weightFile = null;
 
     @Option(name = "-un", aliases = {"--undirected"}, usage = "the compound graph built from the metabolic network and used for computations will undirected, i.e. the reaction directions won't be taken into account", required = false)
@@ -73,7 +73,7 @@ public class TopologicalPathwayAnalysis extends AbstractMet4jApplication {
     @Option(name = "-noi", usage = "file containing the list of metabolites of interests (one per line)", required = true)
     public String dataPath = null;
 
-    @Option(name = "-out", aliases = {"--outDegree"}, usage = "use out-degree as scoring function instead of betweenness", required = false)
+    @Option(name = "-out", aliases = {"--outDegree"}, usage = "use out-degree as scoring function instead of betweenness (faster computation)", required = false)
     public boolean out = false;
 
 
@@ -102,7 +102,7 @@ public class TopologicalPathwayAnalysis extends AbstractMet4jApplication {
         System.out.print("Reading SBML...");
         JsbmlReader reader = new JsbmlReader(this.inputPath);
         ArrayList<PackageParser> pkgs = new ArrayList<>(Arrays.asList(
-                new NotesParser(false), new FBCParser(), new GroupPathwayParser()));
+                new NotesParser(false), new GroupPathwayParser()));
 
         BioNetwork network = null;
 
@@ -187,14 +187,14 @@ public class TopologicalPathwayAnalysis extends AbstractMet4jApplication {
             System.err.println(e.getMessage());
             System.exit(1);
         }
-        System.out.println("Computing Pathway topology Analysis");
+        System.out.println("Done.");
 
 
-        System.out.println("Computing Pathway topology Analysis");
+        System.out.println("Computing Pathway topology Analysis... (may take a while)");
         PathwayTopologyAnalysis computor = new PathwayTopologyAnalysis(network,graph,data).useNormalization();
         PathwayTopologyAnalysis.IndividualScoringStrategy strat = out ? PathwayTopologyAnalysis.IndividualScoringStrategy.outDegree() : PathwayTopologyAnalysis.IndividualScoringStrategy.betweenness() ;
         Map<BioPathway, Double> res = computor.run(strat,PathwayTopologyAnalysis.AggregationStrategy.rawSum());
-        System.out.println("Computing Pathway topology Analysis");
+        System.out.println("Done.");
 
         //export results
         System.out.print("Exporting...");
-- 
GitLab


From eae8a61bd77111df3686540ae890dddbd0882a48 Mon Sep 17 00:00:00 2001
From: cfrainay <clement.frainay@inrae.fr>
Date: Wed, 12 Jul 2023 16:12:56 +0200
Subject: [PATCH 20/20] fix normalization, update test

---
 .../computation/analyze/PathwayTopologyAnalysis.java           | 3 +--
 .../metexplore/met4j_graph/TestPathwayTopologyAnalysis.java    | 3 ++-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java
index 9dbbf0aaf..8f910ae6f 100644
--- a/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java
+++ b/met4j-graph/src/main/java/fr/inrae/toulouse/metexplore/met4j_graph/computation/analyze/PathwayTopologyAnalysis.java
@@ -71,12 +71,11 @@ public class PathwayTopologyAnalysis {
         Map<BioPathway,Double> pathwayFinalScore = computePathwayScore(data, g, scoring, aggregation);
         if(normalize){
             //create background data (i.e dataset with all compounds in network)
-            Set<BioMetabolite> background = kb.values().stream().filter(v -> g.vertexSet().contains(v)).flatMap(Collection::stream)
+            Set<BioMetabolite> background = kb.values().stream().flatMap(Collection::stream).filter(v -> g.vertexSet().contains(v))
                     .collect(Collectors.toSet());
 
             //compute pathway score as if all their compounds were in dataset (recompute individual score for all compounds)
             Map<BioPathway,Double> byPathwayBackgroundScore = computePathwayScore(background, g, scoring, aggregation);
-
             //set final pathway score as ratio between pathway score computed from data and theoretical maximal pathway score
             pathwayFinalScore = Stream.concat(pathwayFinalScore.entrySet().stream(), byPathwayBackgroundScore.entrySet().stream())
                     .collect(Collectors.toMap(
diff --git a/met4j-graph/src/test/java/fr/inrae/toulouse/metexplore/met4j_graph/TestPathwayTopologyAnalysis.java b/met4j-graph/src/test/java/fr/inrae/toulouse/metexplore/met4j_graph/TestPathwayTopologyAnalysis.java
index c84d30850..1fed61424 100644
--- a/met4j-graph/src/test/java/fr/inrae/toulouse/metexplore/met4j_graph/TestPathwayTopologyAnalysis.java
+++ b/met4j-graph/src/test/java/fr/inrae/toulouse/metexplore/met4j_graph/TestPathwayTopologyAnalysis.java
@@ -16,6 +16,7 @@ import java.util.HashSet;
 import java.util.Map;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
 
 public class TestPathwayTopologyAnalysis {
 
@@ -182,7 +183,7 @@ public class TestPathwayTopologyAnalysis {
 
         assertEquals(toyBetweenness.get(a)+toyBetweenness.get(b), res.get(x), 0.00000001);
         assertEquals(toyBetweenness.get(b), res.get(y),  0.00000001);
-        assertEquals(0, res.get(z), 0.00000001);
+        assertFalse(res.containsKey(z));
     }
 
     @Test
-- 
GitLab