diff --git a/CHANGELOG.md b/CHANGELOG.md index d7af514b8e4eedaa2e634e5d1baca954f8f28380..66361e8d9394ee6090aafab3d7ee9e713449e4bf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Features +[met4j-toolbox] Add Apps to compute compound graph's classical weights (degree/chemical similarity) [met4j-graph] Add method to create RPAIRs-like tags on compound graph's edges diff --git a/met4j-mapping/src/main/java/fr/inrae/toulouse/metexplore/met4j_mapping/Mapper.java b/met4j-mapping/src/main/java/fr/inrae/toulouse/metexplore/met4j_mapping/Mapper.java index 884ccb615682634237f8d9f3e0862bf91f7912ae..e41c4e390d6dbbb895648db7c889521c8a50ea63 100644 --- a/met4j-mapping/src/main/java/fr/inrae/toulouse/metexplore/met4j_mapping/Mapper.java +++ b/met4j-mapping/src/main/java/fr/inrae/toulouse/metexplore/met4j_mapping/Mapper.java @@ -43,7 +43,7 @@ import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import java.io.Reader; -import java.util.Collection; +import java.util.*; import java.util.function.Function; import java.util.regex.Pattern; @@ -145,6 +145,32 @@ public class Mapper<E extends BioEntity> { return mapping; } + /** + * From a tabulated file with one entity identifiers column and attributes columns, return a map with the corresponding entities instances found in the network as value + * and a list of attributes as value. + * @param reader the input stream holding identifiers + * @return a collection of matching entities + * @throws IOException + */ + public Map<E, List<String>> mapAttributes(Reader reader) throws IOException { + this.skipped =0; + BufferedReader breader = new BufferedReader(reader); + HashMap<E, List<String>> mapping = new HashMap<>(); + String line; + if(skipHeader) breader.readLine(); + while ((line = breader.readLine()) != null) { + ArrayList<String> parsedLine = new ArrayList<String>(Arrays.asList(line.trim().split(sep))); + String id = parsedLine.get(col-1); + parsedLine.remove(col-1); + E e = this.get(id); + if(e!=null){ + mapping.put(e,parsedLine); + } + } + breader.close(); + return mapping; + } + public int getNumberOfSkippedEntries(){ return this.skipped; } diff --git a/met4j-mapping/src/test/java/fr/inrae/toulouse/metexplore/met4j_mapping/MapperTest.java b/met4j-mapping/src/test/java/fr/inrae/toulouse/metexplore/met4j_mapping/MapperTest.java index f8ed1a37b96fec72559125acca25c3d196b2b723..7716496696e45a6a9a95b23bd0ed6d8823a27b43 100644 --- a/met4j-mapping/src/test/java/fr/inrae/toulouse/metexplore/met4j_mapping/MapperTest.java +++ b/met4j-mapping/src/test/java/fr/inrae/toulouse/metexplore/met4j_mapping/MapperTest.java @@ -12,6 +12,7 @@ import java.io.IOException; import java.io.StringReader; import java.util.ArrayList; import java.util.List; +import java.util.Map; import static org.junit.Assert.*; @@ -97,6 +98,35 @@ public class MapperTest { } } + @Test + public void testMapper22(){ + Mapper<BioMetabolite> m = new Mapper<>(bn, BioNetwork::getMetabolitesView) + .columnSeparator("\t") + .idColumn(2) + .skipHeader() + .skipIfNotFound(); + try { + Map<BioMetabolite,List<String>> res = m.mapAttributes(r1); + assertEquals("wrong number of mapped entries", 3, res.size()); + assertEquals("wrong number of skipped entries", 1, m.getNumberOfSkippedEntries()); + assertTrue("reaction in file not found", res.keySet().contains(a)); + assertTrue("reaction in file not found", res.keySet().contains(b)); + assertTrue("reaction in file not found", res.keySet().contains(c)); + assertEquals("Wrong number of attributes", 2,res.get(a).size()); + assertEquals("Wrong number of attributes", 2,res.get(b).size()); + assertEquals("Wrong number of attributes", 2,res.get(c).size()); + assertEquals("Wrong attribute value", "A",res.get(a).get(0)); + assertEquals("Wrong attribute value", "0.1",res.get(a).get(1)); + assertEquals("Wrong attribute value", "B",res.get(b).get(0)); + assertEquals("Wrong attribute value", "0.2",res.get(b).get(1)); + assertEquals("Wrong attribute value", "C",res.get(c).get(0)); + assertEquals("Wrong attribute value", "0.3",res.get(c).get(1)); + } catch (IOException e) { + Assert.fail("mapping failed"); + e.printStackTrace(); + } + } + @Test public void testMapper3(){ Mapper<BioReaction> m = new Mapper<>(bn, BioNetwork::getReactionsView).skipIfNotFound(); diff --git a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/generic/EdgeWeighting.java b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/generic/EdgeWeighting.java new file mode 100644 index 0000000000000000000000000000000000000000..1cc13fb4a7d0f173511b34b849415562d811402f --- /dev/null +++ b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/generic/EdgeWeighting.java @@ -0,0 +1,159 @@ +package fr.inrae.toulouse.metexplore.met4j_toolbox.generic; + +import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioMetabolite; +import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioNetwork; +import fr.inrae.toulouse.metexplore.met4j_core.biodata.collection.BioCollection; +import fr.inrae.toulouse.metexplore.met4j_graph.computation.connect.weighting.ReactionProbabilityWeight; +import fr.inrae.toulouse.metexplore.met4j_graph.computation.connect.weighting.WeightUtils; +import fr.inrae.toulouse.metexplore.met4j_graph.computation.transform.EdgeMerger; +import fr.inrae.toulouse.metexplore.met4j_graph.computation.transform.VertexContraction; +import fr.inrae.toulouse.metexplore.met4j_graph.core.WeightingPolicy; +import fr.inrae.toulouse.metexplore.met4j_graph.core.compound.CompoundGraph; +import fr.inrae.toulouse.metexplore.met4j_graph.core.compound.ReactionEdge; +import fr.inrae.toulouse.metexplore.met4j_graph.io.Bionetwork2BioGraph; +import fr.inrae.toulouse.metexplore.met4j_graph.io.NodeMapping; +import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.JsbmlReader; +import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.Met4jSbmlReaderException; +import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.plugin.FBCParser; +import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.plugin.GroupPathwayParser; +import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.plugin.NotesParser; +import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.plugin.PackageParser; +import fr.inrae.toulouse.metexplore.met4j_toolbox.generic.annotations.EnumFormats; +import fr.inrae.toulouse.metexplore.met4j_toolbox.generic.annotations.EnumParameterTypes; +import fr.inrae.toulouse.metexplore.met4j_toolbox.generic.annotations.Format; +import fr.inrae.toulouse.metexplore.met4j_toolbox.generic.annotations.ParameterType; +import org.kohsuke.args4j.Option; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; + +/** + * Abstract class for app that provides tabulated compound graph edge list, with one column with edge weight. + */ +public abstract class EdgeWeighting extends AbstractMet4jApplication { + + @Format(name= EnumFormats.Sbml) + @ParameterType(name= EnumParameterTypes.InputFile) + @Option(name = "-s", usage = "input SBML file", required = true) + public String inputPath = null; + + @ParameterType(name= EnumParameterTypes.InputFile) + @Option(name = "-sc", usage = "input Side compound file", required = false) + public String inputSide = null; + + @ParameterType(name= EnumParameterTypes.OutputFile) + @Option(name = "-o", usage = "output edge weight file", required = true) + public String outputPath = null; + + enum strategy {no, by_name,by_id} + @Option(name = "-mc", aliases = {"--mergecomp"}, usage = "merge compartments. " + + "Use names if consistent and unambiguous across compartments, or identifiers if compartment suffix is present (id in form \"xxx_y\" with xxx as base identifier and y as compartment label).") + public strategy mergingStrat = strategy.no; + public String idRegex = "^(\\w+)_\\w$"; + + @Option(name = "-me", aliases = {"--simple"}, usage = "merge parallel edges to produce a simple graph", required = false) + public boolean mergeEdges = false; + + @Option(name = "-un", aliases = {"--undirected"}, usage = "create as undirected", required = false) + public boolean undirected = false; + + @Option(name = "-tp", aliases = {"--transitionproba"}, usage = "set weight as random walk transition probability, normalized by reaction", required = false) + public boolean computeWeight = false; + + @Option(name = "-nan", aliases = {"--removeNaN"}, usage = "do not output edges with undefined weight", required = false) + public boolean removeNaN = false; + + + public abstract WeightingPolicy setWeightingPolicy(); + + public void run() throws IOException, Met4jSbmlReaderException { + + WeightingPolicy wp = setWeightingPolicy(); + + System.out.print("Reading SBML..."); + JsbmlReader reader = new JsbmlReader(this.inputPath); + ArrayList<PackageParser> pkgs = new ArrayList<>(Arrays.asList( + new NotesParser(false), new FBCParser(), new GroupPathwayParser())); + BioNetwork network = reader.read(pkgs); + network = processNetwork(network); + System.out.println(" Done."); + + + System.out.print("Buildinig Network..."); + Bionetwork2BioGraph builder = new Bionetwork2BioGraph(network); + CompoundGraph graph = builder.getCompoundGraph(); + graph = processCompoundGraph(graph); + System.out.println(" Done."); + + //Graph processing: side compound removal [optional] + if (inputSide != null) { + System.err.println("removing side compounds..."); + NodeMapping<BioMetabolite, ReactionEdge, CompoundGraph> mapper = new NodeMapping<>(graph).skipIfNotFound(); + BioCollection<BioMetabolite> sideCpds = mapper.map(inputSide); + boolean removed = graph.removeAllVertices(sideCpds); + if (removed) System.err.println(sideCpds.size() + " compounds removed."); + } + + //Graph processing: set weights [optional] + wp.setWeight(graph); + if(removeNaN) WeightUtils.removeEdgeWithNaNWeight(graph); + + //invert graph as undirected (copy edge weight to reversed edge) + if(undirected){ + System.out.print("Create Undirected..."); + graph.asUndirected(); + System.out.println(" Done."); + } + + //merge compartment + if(mergingStrat!= strategy.no){ + System.out.print("Merging compartments..."); + VertexContraction vc = new VertexContraction(); + VertexContraction.Mapper merger = mergingStrat.equals(strategy.by_name) ? new VertexContraction.MapByName() : new VertexContraction.MapByIdSubString(idRegex); + graph = vc.decompartmentalize(graph, merger); + System.out.println(" Done."); + } + + //compute transitions probability from weights + if(computeWeight) { + System.out.print("Compute transition matrix..."); + ReactionProbabilityWeight wp2 = new ReactionProbabilityWeight(); + wp2.setWeight(graph); + System.out.println(" Done."); + } + + //merge parallel edges + if(mergeEdges){ + System.out.print("Merging edges..."); + EdgeMerger.mergeEdgesWithOverride(graph); + System.out.println(" Done."); + } + + //export graph + System.out.print("Exporting..."); + WeightUtils.export(graph,outputPath); + System.out.println(" Done."); + return; + } + + /** + * Methods to add optional preprocessing of the compound graph + * @param graph the original compound graph + * @return a preprocessed graph + */ + public CompoundGraph processCompoundGraph(CompoundGraph graph) { + return graph; + } + + /** + * Methods to add optional preprocessing of SBML parsing output + * @param network the original network from parsed SBML + * @return a preprocessed network + */ + public BioNetwork processNetwork(BioNetwork network) { + return network; + } + +} + diff --git a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/ChemSimilarityWeighting.java b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/ChemSimilarityWeighting.java new file mode 100644 index 0000000000000000000000000000000000000000..5b156659397722b3282a26957edfc645a7274c29 --- /dev/null +++ b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/ChemSimilarityWeighting.java @@ -0,0 +1,124 @@ +package fr.inrae.toulouse.metexplore.met4j_toolbox.networkAnalysis; + +import fr.inrae.toulouse.metexplore.met4j_chemUtils.chemicalSimilarity.FingerprintBuilder; +import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioMetabolite; +import fr.inrae.toulouse.metexplore.met4j_core.biodata.BioNetwork; +import fr.inrae.toulouse.metexplore.met4j_core.utils.StringUtils; +import fr.inrae.toulouse.metexplore.met4j_graph.computation.connect.weighting.SimilarityWeightPolicy; +import fr.inrae.toulouse.metexplore.met4j_graph.core.WeightingPolicy; +import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.Met4jSbmlReaderException; +import fr.inrae.toulouse.metexplore.met4j_mapping.Mapper; +import fr.inrae.toulouse.metexplore.met4j_toolbox.generic.EdgeWeighting; +import org.kohsuke.args4j.Option; + +import java.io.BufferedReader; +import java.io.FileReader; +import java.io.IOException; +import java.util.List; +import java.util.Map; + +/** + * App that provides tabulated compound graph edge list, with one column with reactant pair's chemical similarity + */ +public class ChemSimilarityWeighting extends EdgeWeighting { + + enum strategy {EState,Extended,KlekotaRoth,MACCS,PubChem}; + @Option(name = "-f", aliases = {"--fingerprint"}, usage = "The chemical fingerprint to use", required = false) + public strategy type = strategy.Extended; + + @Option(name = "-sm", aliases = {"--smileFile"}, usage = "If not present in SBML's annotations, get structure from a tabulated file with first column as compound id and second column as SMILE string, no header. Ignored if inchi file is provided", required = false) + public String smileFile; + @Option(name = "-in", aliases = {"--inchiFile"}, usage = "If not present in SBML's annotations, get structure from a tabulated file with first column as compound id and second column as InChI string, no header.", required = false) + public String inchiFile; + + @Option(name = "-d", aliases = {"--asDist"}, usage = "Use distance rather than similarity", required = false) + public boolean dist = false; + + @Override + public WeightingPolicy setWeightingPolicy() { + SimilarityWeightPolicy wp = new SimilarityWeightPolicy(); + switch(type){ + case EState: wp.setFingerprintType(FingerprintBuilder.ESTATE); + case Extended: wp.setFingerprintType(FingerprintBuilder.EXTENDED); + case KlekotaRoth: wp.setFingerprintType(FingerprintBuilder.KLEKOTAROTH); + case MACCS: wp.setFingerprintType(FingerprintBuilder.MACCS); + case PubChem: wp.setFingerprintType(FingerprintBuilder.PUBCHEM); + } + wp.useDistance(dist); + return wp; + } + + @Override + public BioNetwork processNetwork(BioNetwork bn){ + if(inchiFile!=null){ + Mapper m = new Mapper<>(bn, BioNetwork::getMetabolitesView) + .columnSeparator("\t") + .idColumn(1) + .skipIfNotFound(); + try { + Map<BioMetabolite, List<String>> att = m.mapAttributes(new BufferedReader(new FileReader(inchiFile))); + for(Map.Entry<BioMetabolite, List<String>> entry : att.entrySet()){ + entry.getKey().setInchi(entry.getValue().get(0)); + } + } catch (IOException e) { + System.err.println("Error reading InChI file"); + throw new RuntimeException(e); + } + } else if (smileFile!=null) { + Mapper m = new Mapper<>(bn, BioNetwork::getMetabolitesView) + .columnSeparator("\t") + .idColumn(1) + .skipIfNotFound(); + try { + Map<BioMetabolite, List<String>> att = m.mapAttributes(new BufferedReader(new FileReader(smileFile))); + for(Map.Entry<BioMetabolite, List<String>> entry : att.entrySet()){ + entry.getKey().setSmiles(entry.getValue().get(0)); + } + } catch (IOException e) { + System.err.println("Error reading InChI file"); + throw new RuntimeException(e); + } + } + int s = 0; + int i = 0; + for(BioMetabolite m : bn.getMetabolitesView()){ + if(!StringUtils.isVoid(m.getSmiles())) s++; + if(!StringUtils.isVoid(m.getInchi())) i++; + } + System.out.println(s+"/"+bn.getMetabolitesView().size()+" metabolites with SMILE"); + System.out.println(i+"/"+bn.getMetabolitesView().size()+" metabolites with InChI"); + if((i+s)==0){ + System.err.println("Error: no chemical structure provided, unable to compute chemical similarity"); + System.exit(1); + } + return bn; + } + + public static void main(String[] args) throws Met4jSbmlReaderException, IOException { + + ChemSimilarityWeighting app = new ChemSimilarityWeighting(); + + app.parseArguments(args); + + app.run(); + + } + + @Override + public String getLabel() { + return this.getClass().getSimpleName(); + } + + @Override + public String getLongDescription() { + return "Provides tabulated compound graph edge list, with one column with reactant pair's chemical similarity." + + "Chemical similarity has been proposed as edge weight for finding meaningful paths in metabolic networks," + + " using shortest (lightest) path search. See McSha et al. 2003 (https://doi.org/10.1093/bioinformatics/btg217)," + + " Rahman et al. 2005 (https://doi.org/10.1093/bioinformatics/bti116) and Pertusi et al. 2014 (https://doi.org/10.1093/bioinformatics/btu760)"; + } + + @Override + public String getShortDescription() { + return "Provides tabulated compound graph edge list, with one column with reactant pair's chemical similarity."; + } +} diff --git a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/DegreeWeighting.java b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/DegreeWeighting.java new file mode 100644 index 0000000000000000000000000000000000000000..a55cb91bd2435b0de47e97badb376f1f8eb77d8f --- /dev/null +++ b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/networkAnalysis/DegreeWeighting.java @@ -0,0 +1,52 @@ +package fr.inrae.toulouse.metexplore.met4j_toolbox.networkAnalysis; + +import fr.inrae.toulouse.metexplore.met4j_graph.computation.connect.weighting.DegreeWeightPolicy; +import fr.inrae.toulouse.metexplore.met4j_graph.core.WeightingPolicy; +import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.Met4jSbmlReaderException; +import fr.inrae.toulouse.metexplore.met4j_toolbox.generic.EdgeWeighting; +import org.kohsuke.args4j.Option; + +import java.io.IOException; + +/** + * App that provides tabulated compound graph edge list, with one column with target's degree. + */ +public class DegreeWeighting extends EdgeWeighting { + + @Option(name = "-pow", aliases = {"--power"}, usage = "set weights as the degree raised to the power of number in parameter.", required = false) + public int pow = 1; + + @Override + public WeightingPolicy setWeightingPolicy() { + DegreeWeightPolicy wp = new DegreeWeightPolicy(pow); + return wp; + } + + public static void main(String[] args) throws Met4jSbmlReaderException, IOException { + + DegreeWeighting app = new DegreeWeighting(); + + app.parseArguments(args); + + app.run(); + + } + + @Override + public String getLabel() { + return this.getClass().getSimpleName(); + } + + @Override + public String getLongDescription() { + return "Provides tabulated compound graph edge list, with one column with target's degree." + + "Degree has been proposed as edge weight for finding meaningful paths in metabolic networks," + + " using shortest (lightest) path search. See Croes et al. 2006 (https://doi.org/10.1016/j.jmb.2005.09.079) and" + + " Croes et al. 2005 (https://doi.org/10.1093/nar/gki437)"; + } + + @Override + public String getShortDescription() { + return "Provides tabulated compound graph edge list, with one column with target's degree."; + } +}