From 07edd874d1a99cfd1ec6020687f3367bf38c1911 Mon Sep 17 00:00:00 2001
From: Nathalie Vialaneix <nathalie.vialaneix@inrae.fr>
Date: Fri, 23 Aug 2024 09:57:09 +0200
Subject: [PATCH 01/19] fixed a typo in project description

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 13746f6..ae29d4e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -23,7 +23,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "NMFProfiler"
 version = "0.2.1"
-description = "NMFProfiler: an integrative supervised Non-Negative Matrix Factorization to extract typical profiles of groupes of interest combining two different datasets."
+description = "NMFProfiler: an integrative supervised Non-Negative Matrix Factorization to extract typical profiles of groups of interest combining two different datasets."
 readme = "README.md"
 authors = [
   { name = "AurÃ©lie MercadiÃ©", email = "aurelie.mercadie@inrae.fr" },
-- 
GitLab


From fb7b09a75b8a8d0836eb787f63351880f721efa4 Mon Sep 17 00:00:00 2001
From: Aurelie Mercadie <aurelie.mercadie@inrae.fr>
Date: Wed, 20 Nov 2024 17:38:44 +0100
Subject: [PATCH 02/19] adapt _preprocess_data function to +2 omics (bug)

---
 nmfprofiler/nmfprofiler.py | 49 +++++++++++++++++++++++++-------------
 1 file changed, 32 insertions(+), 17 deletions(-)

diff --git a/nmfprofiler/nmfprofiler.py b/nmfprofiler/nmfprofiler.py
index 5b841ef..01a387b 100644
--- a/nmfprofiler/nmfprofiler.py
+++ b/nmfprofiler/nmfprofiler.py
@@ -935,23 +935,38 @@ class NMFProfiler:
         Apply a min-max normalization and divide by the square root
         of the number of features.
         """
-        # First dataset
-        for i in range(self.omic1.shape[1]):
-            self.omic1[:, i] = (
-                self.omic1[:, i] - np.min(self.omic1[:, i])
-            ) / (
-                np.max(self.omic1[:, i]) - np.min(self.omic1[:, i])
-            )
-        self.omic1 = self.omic1 * (1 / np.sqrt(self.omic1.shape[1]))
-
-        # Second dataset
-        for i in range(self.omic2.shape[1]):
-            self.omic2[:, i] = (
-                self.omic2[:, i] - np.min(self.omic2[:, i])
-            ) / (
-                np.max(self.omic2[:, i]) - np.min(self.omic2[:, i])
-            )
-        self.omic2 = self.omic2 * (1 / np.sqrt(self.omic2.shape[1]))
+        # # First dataset
+        # for i in range(self.omic1.shape[1]):
+        #     self.omic1[:, i] = (
+        #         self.omic1[:, i] - np.min(self.omic1[:, i])
+        #     ) / (
+        #         np.max(self.omic1[:, i]) - np.min(self.omic1[:, i])
+        #     )
+        # self.omic1 = self.omic1 * (1 / np.sqrt(self.omic1.shape[1]))
+
+        # # Second dataset
+        # for i in range(self.omic2.shape[1]):
+        #     self.omic2[:, i] = (
+        #         self.omic2[:, i] - np.min(self.omic2[:, i])
+        #     ) / (
+        #         np.max(self.omic2[:, i]) - np.min(self.omic2[:, i])
+        #     )
+        # self.omic2 = self.omic2 * (1 / np.sqrt(self.omic2.shape[1]))
+
+        omics = [self.omic1, self.omic2]
+
+        # For each dataset
+        for j in range(len(omics)-1):
+            for i in range(omics[j].shape[1]):
+                omics[j][:, i] = (
+                    omics[j][:, i] - np.min(omics[j][:, i])
+                ) / (
+                    np.max(omics[j][:, i]) - np.min(omics[j][:, i])
+                )
+            omics[j] = omics[j] * (1 / np.sqrt(omics[j].shape[1]))
+
+        self.omic1 = omics[0]
+        self.omic2 = omics[1]
 
     def _initialize_w_h_beta(self):
         """Initialize matrices W, H^j and Beta^j.
-- 
GitLab


From d25c6b347c6187264e4396ae5fd2657e0b2b059d Mon Sep 17 00:00:00 2001
From: Aurelie Mercadie <aurelie.mercadie@inrae.fr>
Date: Fri, 22 Nov 2024 16:08:54 +0100
Subject: [PATCH 03/19] generalized _update_W, _computeF, _analytic_solver and
 _preprocess_data functions

---
 nmfprofiler/nmfprofiler.py | 263 ++++++++++++++++---------------------
 1 file changed, 112 insertions(+), 151 deletions(-)

diff --git a/nmfprofiler/nmfprofiler.py b/nmfprofiler/nmfprofiler.py
index 01a387b..1fb8b91 100644
--- a/nmfprofiler/nmfprofiler.py
+++ b/nmfprofiler/nmfprofiler.py
@@ -61,12 +61,15 @@ def _update_W(W, omic1, omic2, H1, H2, mu):
     -------
     Newly updated W.
     """
-    B = (
-        safe_sparse_dot(H1, H1.T)
-        + safe_sparse_dot(H2, H2.T)
-        + (mu * np.identity(W.shape[1]))
-    )
-    C = safe_sparse_dot(omic1, H1.T) + safe_sparse_dot(omic2, H2.T)
+    omics = [omic1, omic2]
+    Hs = [H1, H2]
+
+    B = mu * np.identity(W.shape[1])
+    for j in range(len(Hs)):
+        B += safe_sparse_dot(Hs[j], Hs[j].T)
+    C = np.zeros((W.shape[0], W.shape[1]))
+    for j in range(len(Hs)):
+        C += safe_sparse_dot(omics[j], Hs[j].T)
 
     return multiply(W, divide(C, safe_sparse_dot(W, B)))
 
@@ -243,24 +246,26 @@ def _computeF(
     Either the value of the objective function (i.e. the global loss) alone,
     or accompagned by each specific term to obtain it.
     """
-    distort1 = 0.5 * (norm(omic1 - safe_sparse_dot(W, H1)) ** 2)
-    distort2 = 0.5 * (norm(omic2 - safe_sparse_dot(W, H2)) ** 2)
+    omics = [omic1, omic2]
+    Hs = [H1, H2]
+    Betas = [Beta1, Beta2]
+
     regul = (mu / 2) * np.trace(safe_sparse_dot(W.T, W))
-    sparse1 = lambdA * norm(H1, 1)
-    sparse2 = lambdA * norm(H2, 1)
-    pred1 = (gamma / 2) * (
-        norm(Y - multi_dot([omic1, H1.T[:, 0:K_gp],
-                            np.diag(Beta1[0:K_gp])])) ** 2
-    )
-    pred2 = (gamma / 2) * (
-        norm(Y - multi_dot([omic2, H2.T[:, 0:K_gp],
-                            np.diag(Beta2[0:K_gp])])) ** 2
-    )
+    distort, sparse, pred = ([] for i in range(3))
+    for j in range(len(omics)):
+        distort.append(0.5 * (norm(omics[j] - safe_sparse_dot(W, Hs[j])) ** 2))
+        sparse.append(lambdA * norm(Hs[j], 1))
+        pred.append(
+            (gamma / 2) * (
+                norm(Y - multi_dot([omics[j], Hs[j].T[:, 0:K_gp],
+                                    np.diag(Betas[j][0:K_gp])])) ** 2
+            )
+        )
 
-    loss = distort1 + distort2 + regul + sparse1 + sparse2 + pred1 + pred2
+    loss = sum(distort) + regul + sum(sparse) + sum(pred)
 
     if details:
-        res = [loss, distort1, distort2, sparse1, sparse2, regul, pred1, pred2]
+        res = sum([[loss], distort, sparse, [regul], pred], [])
 
     return loss if details is False else res
 
@@ -501,131 +506,105 @@ def _analytic_solver(
     See (Fevotte, 2011) and sklearn.decomposition.NMF source code for
     choice of parameters.
     """
+    omics = [omic1, omic2]
+    Hs = [H1, H2]
+    Betas = [Beta1, Beta2]
+    Hlosses = [H1_loss, H2_loss]
+    Hgrads = [H1_grad, H2_grad]
+    etas = [params["eta1"], params["eta2"]]
 
     # W update with new values of Hs
-    W_new = _update_W(W, omic1, omic2, H1, H2, params["mu"])
+    W_new = _update_W(W, omics[0], omics[1], Hs[0], Hs[1], params["mu"])
 
     # Hs updates (either MU or Proximal)
-    H1_new, H1_grad = _update_H(
-        H1,
-        W_new,
-        omic1,
-        Beta1,
-        Y,
-        params["gamma"],
-        params["eta1"],
-        params["lambda"],
-        as_sklearn=as_sklearn,
-        grad=True,
-        H_grad=H1_grad,
-    )
-    H2_new, H2_grad = _update_H(
-        H2,
-        W_new,
-        omic2,
-        Beta2,
-        Y,
-        params["gamma"],
-        params["eta2"],
-        params["lambda"],
-        as_sklearn=as_sklearn,
-        grad=True,
-        H_grad=H2_grad,
-    )
+    Hs_new = [0] * len(omics)
+    for j in range(len(omics)):
+        Hs_new[j], Hgrads[j] = _update_H(
+            Hs[j],
+            W_new,
+            omics[j],
+            Betas[j],
+            Y,
+            params["gamma"],
+            etas[j],
+            params["lambda"],
+            as_sklearn=as_sklearn,
+            grad=True,
+            H_grad=Hgrads[j],
+        )
 
     if backtrack and not as_sklearn:
-        # LineSearch for H1
-        H1_loss.append(
-            _computeMargH(
-                omic1,
-                Y,
-                W_new,
-                H1_new,
-                Beta1,
-                params["gamma"],
-                params["lambda"],
-                K_gp
+        # LineSearch for each H
+        for j in range(len(omics)):
+            Hlosses[j].append(
+                _computeMargH(
+                    omics[j],
+                    Y,
+                    W_new,
+                    Hs_new[j],
+                    Betas[j],
+                    params["gamma"],
+                    params["lambda"],
+                    K_gp
+                )
             )
-        )
-        H1_new, H1_loss, params["eta1"] = _linesearch(
-            H_old=H1,
-            H=H1_new,
-            W=W_new,
-            omic=omic1,
-            Beta=Beta1,
-            Y=Y,
-            gamma=params["gamma"],
-            lambdA=params["lambda"],
-            current_eta=params["eta1"],
-            H_loss=H1_loss,
-            alpha=params["alpha"],
-            sigma=params["sigma"],
-            m_back=params["m_back"],
-            max_iter_back=max_iter_back,
-            K_gp=K_gp,
-            verbose=verbose,
-        )
-        # LineSearch for H2
-        H2_loss.append(
-            _computeMargH(
-                omic2,
-                Y,
-                W_new,
-                H2_new,
-                Beta2,
-                params["gamma"],
-                params["lambda"],
-                K_gp
+            Hs_new[j], Hlosses[j], etas[j] = _linesearch(
+                H_old=Hs[j],
+                H=Hs_new[j],
+                W=W_new,
+                omic=omics[j],
+                Beta=Betas[j],
+                Y=Y,
+                gamma=params["gamma"],
+                lambdA=params["lambda"],
+                current_eta=etas[j],
+                H_loss=Hlosses[j],
+                alpha=params["alpha"],
+                sigma=params["sigma"],
+                m_back=params["m_back"],
+                max_iter_back=max_iter_back,
+                K_gp=K_gp,
+                verbose=verbose,
             )
-        )
-        H2_new, H2_loss, params["eta2"] = _linesearch(
-            H_old=H2,
-            H=H2_new,
-            W=W_new,
-            omic=omic2,
-            Beta=Beta2,
-            Y=Y,
-            gamma=params["gamma"],
-            lambdA=params["lambda"],
-            current_eta=params["eta2"],
-            H_loss=H2_loss,
-            alpha=params["alpha"],
-            sigma=params["sigma"],
-            m_back=params["m_back"],
-            max_iter_back=max_iter_back,
-            K_gp=K_gp,
-            verbose=verbose,
-        )
 
     # Betas updates with new values of Hs
     # Compute each regression coef. of each component separately
     # and gather them in a unique vector
-    Beta1_new = np.array(
-        [
-            _update_Beta(
-                omic1, H1_new[0, :], Y[:, 0], params["gamma"], Beta1[0]
-            ),
-            _update_Beta(
-                omic1, H1_new[1, :], Y[:, 1], params["gamma"], Beta1[1]
-            ),
-        ]
-    )
-
-    Beta2_new = np.array(
-        [
-            _update_Beta(
-                omic2, H2_new[0, :], Y[:, 0], params["gamma"], Beta2[0]
-            ),
-            _update_Beta(
-                omic2, H2_new[1, :], Y[:, 1], params["gamma"], Beta2[1]
-            ),
-        ]
-    )
+    Betas_new = [0] * len(omics)
+    for j in range(len(omics)):
+        Betas_new[j] = np.array(
+            [
+                _update_Beta(
+                    omics[j],
+                    Hs_new[j][0, :],
+                    Y[:, 0],
+                    params["gamma"],
+                    Betas[j][0]
+                ),
+                _update_Beta(
+                    omics[j],
+                    Hs_new[j][1, :],
+                    Y[:, 1],
+                    params["gamma"],
+                    Betas[j][1]
+                ),
+            ]
+        )
 
-    # Put to zero all coefficients linked to components k when k > K_gp
-    if K_gp < K:
-        Beta1_new[K_gp:] = 0
-        Beta2_new[K_gp:] = 0
+        # Put to zero all coefficients linked to components k when k > K_gp
+        if K_gp < K:
+            Betas_new[j][K_gp:] = 0
+
+    H1_new = Hs_new[0]
+    H2_new = Hs_new[1]
+    Beta1_new = Betas_new[0]
+    Beta2_new = Betas_new[1]
+    params["eta1"] = etas[0]
+    params["eta2"] = etas[1]
+    H1_loss = Hlosses[0]
+    H2_loss = Hlosses[1]
+    H1_grad = Hgrads[0]
+    H2_grad = Hgrads[1]
 
     return (
         W_new,
@@ -935,28 +914,10 @@ class NMFProfiler:
         Apply a min-max normalization and divide by the square root
         of the number of features.
         """
-        # # First dataset
-        # for i in range(self.omic1.shape[1]):
-        #     self.omic1[:, i] = (
-        #         self.omic1[:, i] - np.min(self.omic1[:, i])
-        #     ) / (
-        #         np.max(self.omic1[:, i]) - np.min(self.omic1[:, i])
-        #     )
-        # self.omic1 = self.omic1 * (1 / np.sqrt(self.omic1.shape[1]))
-
-        # # Second dataset
-        # for i in range(self.omic2.shape[1]):
-        #     self.omic2[:, i] = (
-        #         self.omic2[:, i] - np.min(self.omic2[:, i])
-        #     ) / (
-        #         np.max(self.omic2[:, i]) - np.min(self.omic2[:, i])
-        #     )
-        # self.omic2 = self.omic2 * (1 / np.sqrt(self.omic2.shape[1]))
-
         omics = [self.omic1, self.omic2]
 
         # For each dataset
-        for j in range(len(omics)-1):
+        for j in range(len(omics)):
             for i in range(omics[j].shape[1]):
                 omics[j][:, i] = (
                     omics[j][:, i] - np.min(omics[j][:, i])
-- 
GitLab


From d62779f0d93ff3439fc3ba1260e8ddb171c49378 Mon Sep 17 00:00:00 2001
From: Aurelie Mercadie <aurelie.mercadie@inrae.fr>
Date: Fri, 22 Nov 2024 16:49:06 +0100
Subject: [PATCH 04/19] generalized _initialize_w_h_beta function

---
 nmfprofiler/nmfprofiler.py | 99 ++++++++++++++++++--------------------
 1 file changed, 47 insertions(+), 52 deletions(-)

diff --git a/nmfprofiler/nmfprofiler.py b/nmfprofiler/nmfprofiler.py
index 1fb8b91..6d57a39 100644
--- a/nmfprofiler/nmfprofiler.py
+++ b/nmfprofiler/nmfprofiler.py
@@ -939,6 +939,7 @@ class NMFProfiler:
         ----
         Based on _initialize_nmf of sklearn.decomposition.NMF.
         """
+        omicstemp = [self.omic1, self.omic2]
 
         # Extract K, the number of latent components, as equal to
         # the number of distinct groups in y
@@ -948,8 +949,10 @@ class NMFProfiler:
         K_gp = len(np.unique(self.y))
 
         # For W, H1 and H2:
+        # 1.0. Initialize H0s list
+        H0s = [0] * len(omicstemp)
         # 1.1. Concatenate omics data sets
-        omics = np.concatenate((self.omic1, self.omic2), axis=1)
+        omics = np.concatenate(omicstemp, axis=1)
         # 1.2. Initialize using sklearn function
         if self.init_method == "random2":
             # 1.2a. Initialize W with both omics and Hj
@@ -960,29 +963,25 @@ class NMFProfiler:
                 init="random",
                 random_state=self.seed
             )
-            *_, H1_0 = _initialize_nmf(
-                X=self.omic1,
-                n_components=K,
-                init="random",
-                random_state=self.seed
-            )
-            *_, H2_0 = _initialize_nmf(
-                X=self.omic2,
-                n_components=K,
-                init="random",
-                random_state=self.seed
-            )
-            del _
+            for j in range(len(omicstemp)):
+                *_, H0s[j] = _initialize_nmf(
+                    X=omicstemp[j],
+                    n_components=K,
+                    init="random",
+                    random_state=self.seed
+                )
+                del _
         elif self.init_method == "random3":
             # 1.2b. FOR IDENTICAL OMICS DATA SETS - Initialize W
             # with both omics, H1 with omic1 and H2 as H1 (random)
-            W0, H1_0 = _initialize_nmf(
-                X=self.omic1,
+            W0, H0s[0] = _initialize_nmf(
+                X=omicstemp[0],
                 n_components=K,
                 init="random",
                 random_state=self.seed
             )
-            H2_0 = H1_0.copy()
+            for j in range(1, len(omicstemp)):
+                H0s[j] = H0s[0].copy()
         elif self.init_method == "nndsvd2":
             # 1.2c. Initialize W with both omics and Hj
             # with specific omic (nndsvd)
@@ -992,36 +991,27 @@ class NMFProfiler:
                 init="nndsvd",
                 random_state=self.seed
             )
-            *_, H1_0 = _initialize_nmf(
-                X=self.omic1,
-                n_components=K,
-                init="nndsvd",
-                random_state=self.seed
-            )
-            *_, H2_0 = _initialize_nmf(
-                X=self.omic2,
-                n_components=K,
-                init="nndsvd",
-                random_state=self.seed
-            )
-            del _
+            for j in range(len(omicstemp)):
+                *_, H0s[j] = _initialize_nmf(
+                    X=omicstemp[j],
+                    n_components=K,
+                    init="nndsvd",
+                    random_state=self.seed
+                )
+                del _
         elif self.init_method == "nndsvd3":
             # 1.2d. Initialize W with each omic then take the mean and
             # initialize Hj with specific omic (nndsvd)
-            W_a, H1_0 = _initialize_nmf(
-                X=self.omic1,
-                n_components=K,
-                init="nndsvd",
-                random_state=self.seed
-            )
-            W_b, H2_0 = _initialize_nmf(
-                X=self.omic2,
-                n_components=K,
-                init="nndsvd",
-                random_state=self.seed
-            )
-            W0 = (1 / 2) * (W_a + W_b)
-            del W_a, W_b
+            W_js = [0] * len(omicstemp)
+            for j in range(len(omicstemp)):
+                W_js[j], H0s[j] = _initialize_nmf(
+                    X=omicstemp[j],
+                    n_components=K,
+                    init="nndsvd",
+                    random_state=self.seed
+                )
+            W0 = (1 / len(omicstemp)) * (sum(W_js))
+            del W_js
         else:
             # 1.2e. Initialize both with all omics, using whatever method
             # available in _initialize_nmf(). See help.
@@ -1032,18 +1022,23 @@ class NMFProfiler:
                 random_state=self.seed
             )
             # 1.2e. Split H matrix
-            H1_0, H2_0 = np.split(
+            H0s = np.split(
                 ary=H0, indices_or_sections=[self.omic1.shape[1]], axis=1
             )
             del H0
 
-        # For Beta1 and Beta2:
-        Beta1_0 = np.repeat(1, K)
-        Beta2_0 = np.repeat(1, K)
-        # Put to zero all coefficients linked to component k when k > K_gp
-        if K_gp < K:
-            Beta1_0[K_gp:] = 0
-            Beta2_0[K_gp:] = 0
+        # For Betas:
+        Beta0s = [0] * len(omicstemp)
+        for j in range(len(omicstemp)):
+            Beta0s[j] = np.repeat(1, K)
+            # Put to zero all coefficients linked to component k when k > K_gp
+            if K_gp < K:
+                Beta0s[j][K_gp:] = 0
+
+        H1_0 = H0s[0]
+        H2_0 = H0s[1]
+        Beta1_0 = Beta0s[0]
+        Beta2_0 = Beta0s[1]
 
         return W0, H1_0, H2_0, Beta1_0, Beta2_0
 
-- 
GitLab


From 3d6067193a0cd474c1b51fdd0d69708a87af3bd4 Mon Sep 17 00:00:00 2001
From: Aurelie Mercadie <aurelie.mercadie@inrae.fr>
Date: Fri, 22 Nov 2024 17:48:26 +0100
Subject: [PATCH 05/19] generalized predict, evolplot, heatmap functions
 (unchecked)

---
 nmfprofiler/nmfprofiler.py | 62 ++++++++++++++++++++++----------------
 1 file changed, 36 insertions(+), 26 deletions(-)

diff --git a/nmfprofiler/nmfprofiler.py b/nmfprofiler/nmfprofiler.py
index 6d57a39..9e1dac3 100644
--- a/nmfprofiler/nmfprofiler.py
+++ b/nmfprofiler/nmfprofiler.py
@@ -1498,19 +1498,21 @@ class NMFProfiler:
         ------
         :group: list. Predicted group (one of 0 or 1) for the new sample
             in each omic.
-        :proj1: array. Projection onto H1
-        :proj2: array. Projection onto H2
+        :projs: list of arrays. Projection onto each Hj matrix
         """
-        # Convert to right format
-        new_ind_X1 = new_ind[0][newaxis]
-        new_ind_X2 = new_ind[1][newaxis]
+        Hs = [self.H1, self.H2]
 
-        # Compute projections of the new samples onto dictionary matrices
-        proj1 = safe_sparse_dot(new_ind_X1, self.H1.T)
-        proj2 = safe_sparse_dot(new_ind_X2, self.H2.T)
+        # Initialize projection list and compute
+        projs = []
+        for j in len(new_ind):
+            # Convert to right format
+            new_ind_Xj = new_ind[j][newaxis]
+
+            # Compute projections of the new samples onto dictionary matrices
+            projs.append(safe_sparse_dot(new_ind_Xj, Hs[j].T))
 
         # For each omic, find which component gave the highest score
-        group = [proj1.argmax(), proj2.argmax()]
+        group = [projs[j].argmax() for j in len(new_ind)]
 
         # Compute global group value
         group_value = int(np.average(group))
@@ -1520,7 +1522,7 @@ class NMFProfiler:
                   profile of group {group_value}."
             )
 
-        res = {"group": group, "proj1": proj1, "proj2": proj2}
+        res = {"group": group, "projs": projs}
 
         return res
 
@@ -1567,14 +1569,18 @@ class NMFProfiler:
         ------
         Return a lineplot.
         """
+        temppalette = ["blue", "orange", "green", "pink", "yellow", "red"]
+
         if obj_to_check == "etas":
             plt.figure(figsize=(width, height))
-            sns.lineplot(data=self.df_etas, palette=["blue", "orange"])
+            sns.lineplot(data=self.df_etas,
+                         palette=temppalette[0:self.df_etas.shape[1]])
             plt.show()
 
         elif obj_to_check == "gradients":
             plt.figure(figsize=(width, height))
-            sns.lineplot(data=self.df_grads, palette=["blue", "orange"])
+            sns.lineplot(data=self.df_grads,
+                         palette=temppalette[0:self.df_etas.shape[1]])
             plt.show()
 
         else:
@@ -1584,13 +1590,13 @@ class NMFProfiler:
                 "from NMFProfiler can be plotted with this method."
             )
 
-    def heatmap(self, obj_to_viz, width, height, path):
+    def heatmap(self, obj_to_viz, width, height, path, omic_number=None):
         """Visualize any matrix of X^j, W, H^j with a heatmap.
 
         Params
         ------
-        :obj_to_viz: str. One of {`'omic1'`, `'omic2'`,
-            `'W'`, `'H1'`, `'H2'`}.
+        :obj_to_viz: str. One of {`'omic'`, `'W'`, `'H'`}.
+        :omic_number: int. Number from 1 to J (max. number of omics).
         :width: int. Width of the figure (in units by default).
         :height: int. Height of the figure (in units by default).
         :path: str. Location to save the figure.
@@ -1601,22 +1607,26 @@ class NMFProfiler:
         """
         plt.figure(figsize=(width, height))
 
-        if obj_to_viz == "omic1":
-            sns.heatmap(pd.DataFrame(self.omic1), cmap="viridis")
-        elif obj_to_viz == "omic2":
-            sns.heatmap(pd.DataFrame(self.omic2), cmap="viridis")
-        elif obj_to_viz == "W":
+        omics = [self.omic1, self.omic2]
+        Hs = [self.H1, self.H2]
+
+        if obj_to_viz == "W":
             sns.heatmap(pd.DataFrame(self.W), cmap="viridis")
-        elif obj_to_viz == "H1":
-            sns.heatmap(pd.DataFrame(self.H1), cmap="viridis")
-        elif obj_to_viz == "H2":
-            sns.heatmap(pd.DataFrame(self.H2), cmap="viridis")
+        elif obj_to_viz == "omic":
+            sns.heatmap(pd.DataFrame(omics[(omic_number-1)]), cmap="viridis")
+        elif obj_to_viz == "H":
+            sns.heatmap(pd.DataFrame(Hs[(omic_number-1)]), cmap="viridis")
         else:
             raise Exception(
                 "Cannot plot this object, please change 'obj_to_viz' input."
-                " Only 'omic1', 'omic2', 'W', 'H1' and 'H2' outputs "
+                " Only 'omic', 'W' and 'H' outputs "
                 "from NMFProfiler can be plotted with this method."
             )
 
-        plt.savefig(str(path + obj_to_viz + "_Heatmap.png"))
+        if obj_to_viz == "W":
+            plotname = str(path + obj_to_viz + "_Heatmap.png")
+        else:
+            plotname = str(path + obj_to_viz + omic_number + "_Heatmap.png")
+
+        plt.savefig(plotname)
         plt.show()
-- 
GitLab


From 225d4e1dba447c4d5a1fbab655f0a2e18cb877fe Mon Sep 17 00:00:00 2001
From: cebrouard <celinebrouard@MacBook-Pro-de-cebrouard.local>
Date: Mon, 25 Nov 2024 17:01:21 +0100
Subject: [PATCH 06/19] started to modify fit

---
 nmfprofiler/nmfprofiler.py | 187 ++++++++++++++++++++++++-------------
 1 file changed, 122 insertions(+), 65 deletions(-)

diff --git a/nmfprofiler/nmfprofiler.py b/nmfprofiler/nmfprofiler.py
index 9e1dac3..2c8e032 100644
--- a/nmfprofiler/nmfprofiler.py
+++ b/nmfprofiler/nmfprofiler.py
@@ -1062,13 +1062,18 @@ class NMFProfiler:
         # Pre-process datasets (with min-max and number of features)
         self._preprocess_data()
 
+       
+        omics = [self.omic1, self.omic2]
+        
         # Initialize matrices
         W0, H1_0, H2_0, Beta1_0, Beta2_0 = self._initialize_w_h_beta()
+        Hs_0 = [H1_0, H2_0]
+        Betas_0 = [Beta1_0, Beta2_0]
         self.W_init = W0
-        self.H1_init = H1_0
-        self.H2_init = H2_0
-        self.Beta1_init = Beta1_0
-        self.Beta2_init = Beta2_0
+        self.H1_init = Hs_0[0]
+        self.H2_init = Hs_0[1]
+        self.Beta1_init = Betas_0[0]
+        self.Beta2_init = Betas_0[1]
 
         # Update lambda and gamma given sample size
         self._update_params()
@@ -1078,13 +1083,17 @@ class NMFProfiler:
 
         # Create matrices and vectors to update using initialization
         # (and keep intact initialized matrices and vectors)
-        W, H1, H2, Beta1, Beta2 = (
-            deepcopy(W0),
-            deepcopy(H1_0),
-            deepcopy(H2_0),
-            deepcopy(Beta1_0),
-            deepcopy(Beta2_0),
-        )
+        #W, H1, H2, Beta1, Beta2 = (
+        #    deepcopy(W0),
+        #    deepcopy(H1_0),
+        #    deepcopy(H2_0),
+        #    deepcopy(Beta1_0),
+        #    deepcopy(Beta2_0),
+        #)
+        
+        W = deepcopy(W0)
+        Hs = [deepcopy(Hs_0[j]) for j in range(len(omics))]
+        Betas = [deepcopy(Betas_0[j]) for j in range(len(omics))]
 
         # Create lists of error terms to enrich iteratively
         (
@@ -1102,6 +1111,13 @@ class NMFProfiler:
             pred2,
             nb_iters,
         ) = ([] for i in range(13))
+        
+        margHs = [margH1, margH2]
+        gradHs = [gradH1, gradH2]
+        distorts = [distort1, distort2]
+        sparsitys = [sparsity1, sparsity2]
+        preds = [pred1, pred2]
+        
         # Create lists of LDA performance indicators to enrich iteratively
         (
             R2adj_1,
@@ -1117,16 +1133,22 @@ class NMFProfiler:
             bet2_1,
             bet2_2,
         ) = ([] for i in range(12))
+        
+        R2adjs = [R2adj_1, R2adj_2]
+        BICs = [BIC_1, BIC_2]
+        AICs = [AIC_1, AIC_2]
+        F_pvals = [F_pval_1, F_pval_2]
+        bets = [[bet1_1,bet1_2],[bet2_1,bet2_2]]
 
         loss_init = _computeF(
-            self.omic1,
-            self.omic2,
+            omics[0],
+            omics[1],
             Y,
             W0,
-            H1_0,
-            H2_0,
-            Beta1_0,
-            Beta2_0,
+            Hs_0[0],
+            Hs_0[1],
+            Betas_0[0],
+            Betas_0[1],
             self.params["mu"],
             self.params["lambda"],
             self.params["gamma"],
@@ -1135,61 +1157,95 @@ class NMFProfiler:
         )
         error.append(loss_init[0])
         nb_iters.append(0)
-        distort1.append(loss_init[1])
-        distort2.append(loss_init[2])
-        sparsity1.append(loss_init[3])
-        sparsity2.append(loss_init[4])
+        distorts[0].append(loss_init[1])
+        distorts[1].append(loss_init[2])
+        sparsitys[0].append(loss_init[3])
+        sparsitys[1].append(loss_init[4])
         regul.append(loss_init[5])
-        pred1.append(loss_init[6])
-        pred2.append(loss_init[7])
+        preds[0].append(loss_init[6])
+        preds[1].append(loss_init[7])
+        
+        # change computeF outputs for more than 2 omics
+            
 
         # Keep track of marginal objective functions in Hj with
         # initial matrices (necessary for linesearch first execution)
-        margH1.append(
-            _computeMargH(
-                self.omic1,
-                Y,
-                W0,
-                H1_0,
-                Beta1_0,
-                self.params["gamma"],
-                self.params["lambda"],
-                K_gp,
-            )
-        )
-        margH2.append(
-            _computeMargH(
-                self.omic2,
-                Y,
-                W0,
-                H2_0,
-                Beta2_0,
-                self.params["gamma"],
-                self.params["lambda"],
-                K_gp,
+        for j in range(len(omics)):
+            margHs[j].append(
+                _computeMargH(
+                    omics[j],
+                    Y,
+                    W0,
+                    Hs_0[j],
+                    Betas_0[j],
+                    self.params["gamma"],
+                    self.params["lambda"],
+                    K_gp,
+                )
             )
-        )
+
 
         # LDAs with initial matrices
-        reg1_init = sm.OLS(
-            self.y, sm.add_constant(safe_sparse_dot(self.omic1, H1_0.T))
-        ).fit()
-        reg2_init = sm.OLS(
-            self.y, sm.add_constant(safe_sparse_dot(self.omic2, H2_0.T))
-        ).fit()
-
-        bet1_1.append(reg1_init.params[1])
-        bet2_1.append(reg1_init.params[2])
-        bet1_2.append(reg2_init.params[1])
-        bet2_2.append(reg2_init.params[2])
-        R2adj_1.append(reg1_init.rsquared_adj)
-        R2adj_2.append(reg2_init.rsquared_adj)
-        BIC_1.append(reg1_init.bic)
-        BIC_2.append(reg2_init.bic)
-        AIC_1.append(reg1_init.aic)
-        AIC_2.append(reg2_init.aic)
-        F_pval_1.append(reg1_init.f_pvalue)
-        F_pval_2.append(reg2_init.f_pvalue)
+        regs_init = []
+        for j in range(len(omics)):
+        
+            regs_init[j] = sm.OLS(
+                self.y, sm.add_constant(safe_sparse_dot(omics[j], Hs_0[j].T))
+            ).fit()
+
+        for j in range(len(omics)):
+            bets[j][0].append(regs_init[j].params[1])
+            bets[j][1].append(regs_init[j].params[2])
+            R2adjs[j].append(regs_init[j].rsquared_adj)
+            BICs[j].append(regs_init[j].bic)
+            AICs[j].append(regs_init[j].aic)
+            F_pvals[j].append(regs_init[j].f_pvalue)
+            
+        #bet1_1.append(reg1_init.params[1])
+        #bet2_1.append(reg1_init.params[2])
+        #bet1_2.append(reg2_init.params[1])
+        #bet2_2.append(reg2_init.params[2])
+        #R2adj_1.append(reg1_init.rsquared_adj)
+        #R2adj_2.append(reg2_init.rsquared_adj)
+        #BIC_1.append(reg1_init.bic)
+        #BIC_2.append(reg2_init.bic)
+        #AIC_1.append(reg1_init.aic)
+        #AIC_2.append(reg2_init.aic)
+        #F_pval_1.append(reg1_init.f_pvalue)
+        #F_pval_2.append(reg2_init.f_pvalue)
+        
+        H1 = Hs[0]
+        H2 = Hs[1]
+        Beta1 = Betas[0]
+        Beta2 = Betas[1]
+        H1_0 = Hs_0[0]
+        H2_0 = Hs_0[1]
+        Beta1_0 = Betas_0[0]
+        Beta2_0 = Betas_0[1]
+        margH1 = margHs[0]
+        margH2 = margHs[1]
+        gradH1 = gradHs[0]
+        gradH2 = gradHs[1]
+        distort1 = distorts[0]
+        distort2 = distorts[1]
+        sparsity1 = sparsitys[0]
+        sparsity2 = sparsitys[1]
+        pred1 = preds[0]
+        pred2 = preds[1]
+        R2adj_1 = R2adjs[0]
+        R2adj_2 = R2adjs[1]
+        BIC_1 = BICS[0]
+        BIC_2 = BICS[1]
+        AIC_1 = AICs[0]
+        AIC_2 = AICs[1]
+        F_pval_1 = F_pvals[0]
+        F_pval_2 = F_pvals[1]
+        bet1_1 = bets[0][0]
+        bet1_2 = bets[0][1]
+        bet2_1 = bets[1][0]
+        bet2_2 = bets[1][1]
+        
+        
 
         # Show the initial global loss value
         if self.verbose:
@@ -1199,6 +1255,7 @@ class NMFProfiler:
         eta1, eta2 = ([] for i in range(2))
         eta1.append(self.params["eta1"])
         eta2.append(self.params["eta2"])
+        etas = [eta1,eta2]
 
         # Begin the optimization,k
         start_time = process_time()
-- 
GitLab


From 840c64322849193830d00807974fb5fb420ab027 Mon Sep 17 00:00:00 2001
From: Aurelie Mercadie <aurelie.mercadie@inrae.fr>
Date: Tue, 26 Nov 2024 15:29:19 +0100
Subject: [PATCH 07/19] generalized _check_inputs_NMFProfiler function

---
 nmfprofiler/utils/validate_inputs.py | 57 +++++++++++++---------------
 1 file changed, 27 insertions(+), 30 deletions(-)

diff --git a/nmfprofiler/utils/validate_inputs.py b/nmfprofiler/utils/validate_inputs.py
index b3d7ec6..e817e12 100644
--- a/nmfprofiler/utils/validate_inputs.py
+++ b/nmfprofiler/utils/validate_inputs.py
@@ -43,34 +43,31 @@ def _check_inputs_NMFProfiler(X):
         'nndsvdar'
     ]
 
-    if _check_type(X.omic1, np.ndarray) is False:
-        raise TypeError(
-            f"Received {type(X.omic1)}, "
-            "'omic1' should be a numpy.ndarray."
-        )
-
-    if _check_type(X.omic2, np.ndarray) is False:
-        raise TypeError(
-            f"Received {type(X.omic2)}, "
-            "'omic2' should be a numpy.ndarray."
-        )
-    if X.omic1.shape[0] != X.omic2.shape[0]:  # check dimensions
-        raise Exception(
-            "Both datasets should have the exact same samples, but "
-            f"omic1 has {X.omic1.shape[0]} samples "
-            f"and omic2 has {X.omic2.shape[0]} samples. "
-            "Please check datasets."
-        )
+    omics = [X.omic1, X.omic2]
+    for j in range(len(omics)):
+        if _check_type(omics[j], np.ndarray) is False:
+            raise TypeError(
+                f"Received {type(omics[j])}, "
+                f"item number {j+1} in 'omics' (i.e. omic{j+1}) "
+                "should be a numpy.ndarray."
+            )
+        if omics[0].shape[0] != omics[j].shape[0]:  # check dimensions
+            raise Exception(
+                "All datasets should have the exact same samples, but "
+                f"omic1 (item 1 in 'omics') has {omics[0].shape[0]} samples "
+                f"and omic{j+1} (item {j+1} in 'omics') has "
+                f"{omics[j].shape[0]} samples. Please check datasets."
+            )
 
     if _check_type(X.y, np.ndarray) is False:
         raise TypeError(
             f"Received {type(X.y)}, "
             "'y' should be a 1D numpy.ndarray."
         )
-    if X.y.shape[0] != X.omic1.shape[0]:  # check dimensions
+    if X.y.shape[0] != omics[0].shape[0]:  # check dimensions
         raise Exception(
             f"Group vector y has {X.y.shape[0]} samples but "
-            f"omic1 has {X.omic1.shape[0]} samples. "
+            f"omic1 (item 1 in 'omics') has {omics[0].shape[0]} samples. "
             "Please check datasets."
         )
     if len(np.unique(X.y)) < 2:  # check y values
@@ -98,16 +95,16 @@ def _check_inputs_NMFProfiler(X):
             f"Received {X.params['alpha']} {type(X.params['alpha'])}, "
             "'alpha' should be a positive integer."
         )
-    if _check_type(X.params['eta1'], float) is False or X.params['eta1'] <= 0:
-        raise TypeError(
-            f"Received {X.params['eta1']} {type(X.params['eta1'])}, "
-            "'eta1' should be a positive float."
-        )
-    if _check_type(X.params['eta2'], float) is False or X.params['eta2'] <= 0:
-        raise TypeError(
-            f"Received {X.params['eta2']} {type(X.params['eta2'])}, "
-            "'eta2' should be a positive float."
-        )
+
+    etas = [X.params['eta1'], X.params['eta2']]
+    for j in range(len(etas)):
+        if _check_type(etas[j], float) is False or etas[j] <= 0:
+            raise TypeError(
+                f"Received {etas[j]} {type(etas[j])}, "
+                f"eta {j+1} (item {j+1} in 'params['etas']') "
+                "should be a positive float."
+            )
+
     if _check_type(X.params['gamma'], float) is False or X.params['gamma'] < 0:
         raise TypeError(
             f"Received {X.params['gamma']} {type(X.params['gamma'])}, "
-- 
GitLab


From 152a3a28d11bf622d6fc8d82f9026ddc5704fd91 Mon Sep 17 00:00:00 2001
From: Aurelie Mercadie <aurelie.mercadie@inrae.fr>
Date: Wed, 27 Nov 2024 10:50:24 +0100
Subject: [PATCH 08/19] generalized fit function (1st step)

---
 nmfprofiler/nmfprofiler.py | 370 ++++++++++++++-----------------------
 1 file changed, 142 insertions(+), 228 deletions(-)

diff --git a/nmfprofiler/nmfprofiler.py b/nmfprofiler/nmfprofiler.py
index 2c8e032..336b9aa 100644
--- a/nmfprofiler/nmfprofiler.py
+++ b/nmfprofiler/nmfprofiler.py
@@ -1062,18 +1062,19 @@ class NMFProfiler:
         # Pre-process datasets (with min-max and number of features)
         self._preprocess_data()
 
-       
-        omics = [self.omic1, self.omic2]
-        
+        omics = [self.omic1, self.omic2]  # ***
+
         # Initialize matrices
-        W0, H1_0, H2_0, Beta1_0, Beta2_0 = self._initialize_w_h_beta()
-        Hs_0 = [H1_0, H2_0]
-        Betas_0 = [Beta1_0, Beta2_0]
+        W0, H1_0, H2_0, Beta1_0, Beta2_0 = self._initialize_w_h_beta()  # ***
+
+        Hs_0 = [H1_0, H2_0]  # ***
+        Betas_0 = [Beta1_0, Beta2_0]  # ***
+
         self.W_init = W0
-        self.H1_init = Hs_0[0]
-        self.H2_init = Hs_0[1]
-        self.Beta1_init = Betas_0[0]
-        self.Beta2_init = Betas_0[1]
+        self.H1_init = Hs_0[0]  # ***
+        self.H2_init = Hs_0[1]  # ***
+        self.Beta1_init = Betas_0[0]  # ***
+        self.Beta2_init = Betas_0[1]  # ***
 
         # Update lambda and gamma given sample size
         self._update_params()
@@ -1083,20 +1084,12 @@ class NMFProfiler:
 
         # Create matrices and vectors to update using initialization
         # (and keep intact initialized matrices and vectors)
-        #W, H1, H2, Beta1, Beta2 = (
-        #    deepcopy(W0),
-        #    deepcopy(H1_0),
-        #    deepcopy(H2_0),
-        #    deepcopy(Beta1_0),
-        #    deepcopy(Beta2_0),
-        #)
-        
         W = deepcopy(W0)
         Hs = [deepcopy(Hs_0[j]) for j in range(len(omics))]
         Betas = [deepcopy(Betas_0[j]) for j in range(len(omics))]
 
         # Create lists of error terms to enrich iteratively
-        (
+        (  # ***
             error,
             margH1,
             margH2,
@@ -1111,15 +1104,15 @@ class NMFProfiler:
             pred2,
             nb_iters,
         ) = ([] for i in range(13))
-        
-        margHs = [margH1, margH2]
-        gradHs = [gradH1, gradH2]
-        distorts = [distort1, distort2]
-        sparsitys = [sparsity1, sparsity2]
-        preds = [pred1, pred2]
-        
+
+        margHs = [margH1, margH2]  # ***
+        gradHs = [gradH1, gradH2]  # ***
+        distorts = [distort1, distort2]  # ***
+        sparsitys = [sparsity1, sparsity2]  # ***
+        preds = [pred1, pred2]  # ***
+
         # Create lists of LDA performance indicators to enrich iteratively
-        (
+        (  # ***
             R2adj_1,
             R2adj_2,
             BIC_1,
@@ -1133,14 +1126,14 @@ class NMFProfiler:
             bet2_1,
             bet2_2,
         ) = ([] for i in range(12))
-        
-        R2adjs = [R2adj_1, R2adj_2]
-        BICs = [BIC_1, BIC_2]
-        AICs = [AIC_1, AIC_2]
-        F_pvals = [F_pval_1, F_pval_2]
-        bets = [[bet1_1,bet1_2],[bet2_1,bet2_2]]
-
-        loss_init = _computeF(
+
+        R2adjs = [R2adj_1, R2adj_2]  # ***
+        BICs = [BIC_1, BIC_2]  # ***
+        AICs = [AIC_1, AIC_2]  # ***
+        F_pvals = [F_pval_1, F_pval_2]  # ***
+        bets = [[bet1_1, bet1_2], [bet2_1, bet2_2]]  # ***
+
+        loss_init = _computeF(  # ***
             omics[0],
             omics[1],
             Y,
@@ -1157,16 +1150,13 @@ class NMFProfiler:
         )
         error.append(loss_init[0])
         nb_iters.append(0)
-        distorts[0].append(loss_init[1])
-        distorts[1].append(loss_init[2])
-        sparsitys[0].append(loss_init[3])
-        sparsitys[1].append(loss_init[4])
-        regul.append(loss_init[5])
-        preds[0].append(loss_init[6])
-        preds[1].append(loss_init[7])
-        
-        # change computeF outputs for more than 2 omics
-            
+        distorts[0].append(loss_init[1])  # ***
+        distorts[1].append(loss_init[2])  # ***
+        sparsitys[0].append(loss_init[3])  # ***
+        sparsitys[1].append(loss_init[4])  # ***
+        regul.append(loss_init[5])  # ***
+        preds[0].append(loss_init[6])  # ***
+        preds[1].append(loss_init[7])  # ***
 
         # Keep track of marginal objective functions in Hj with
         # initial matrices (necessary for linesearch first execution)
@@ -1184,80 +1174,32 @@ class NMFProfiler:
                 )
             )
 
-
         # LDAs with initial matrices
-        regs_init = []
-        for j in range(len(omics)):
-        
+        regs_init = [0] * len(omics)  # ***
+        for j in range(len(omics)):  # ***
             regs_init[j] = sm.OLS(
                 self.y, sm.add_constant(safe_sparse_dot(omics[j], Hs_0[j].T))
             ).fit()
 
-        for j in range(len(omics)):
+        for j in range(len(omics)):  # ***
             bets[j][0].append(regs_init[j].params[1])
             bets[j][1].append(regs_init[j].params[2])
             R2adjs[j].append(regs_init[j].rsquared_adj)
             BICs[j].append(regs_init[j].bic)
             AICs[j].append(regs_init[j].aic)
             F_pvals[j].append(regs_init[j].f_pvalue)
-            
-        #bet1_1.append(reg1_init.params[1])
-        #bet2_1.append(reg1_init.params[2])
-        #bet1_2.append(reg2_init.params[1])
-        #bet2_2.append(reg2_init.params[2])
-        #R2adj_1.append(reg1_init.rsquared_adj)
-        #R2adj_2.append(reg2_init.rsquared_adj)
-        #BIC_1.append(reg1_init.bic)
-        #BIC_2.append(reg2_init.bic)
-        #AIC_1.append(reg1_init.aic)
-        #AIC_2.append(reg2_init.aic)
-        #F_pval_1.append(reg1_init.f_pvalue)
-        #F_pval_2.append(reg2_init.f_pvalue)
-        
-        H1 = Hs[0]
-        H2 = Hs[1]
-        Beta1 = Betas[0]
-        Beta2 = Betas[1]
-        H1_0 = Hs_0[0]
-        H2_0 = Hs_0[1]
-        Beta1_0 = Betas_0[0]
-        Beta2_0 = Betas_0[1]
-        margH1 = margHs[0]
-        margH2 = margHs[1]
-        gradH1 = gradHs[0]
-        gradH2 = gradHs[1]
-        distort1 = distorts[0]
-        distort2 = distorts[1]
-        sparsity1 = sparsitys[0]
-        sparsity2 = sparsitys[1]
-        pred1 = preds[0]
-        pred2 = preds[1]
-        R2adj_1 = R2adjs[0]
-        R2adj_2 = R2adjs[1]
-        BIC_1 = BICS[0]
-        BIC_2 = BICS[1]
-        AIC_1 = AICs[0]
-        AIC_2 = AICs[1]
-        F_pval_1 = F_pvals[0]
-        F_pval_2 = F_pvals[1]
-        bet1_1 = bets[0][0]
-        bet1_2 = bets[0][1]
-        bet2_1 = bets[1][0]
-        bet2_2 = bets[1][1]
-        
-        
 
         # Show the initial global loss value
         if self.verbose:
             print("Error after initialization step: %f" % (error[0]))
 
         # To keep track of the choice of etas
-        eta1, eta2 = ([] for i in range(2))
-        eta1.append(self.params["eta1"])
-        eta2.append(self.params["eta2"])
-        etas = [eta1,eta2]
+        etas_init = [self.params["eta1"], self.params["eta2"]]  # ***
+        etas = [[0] for _ in range(len(omics))]  # ***
+        for j in range(len(etas)):
+            etas[j].append(etas_init[j])  # CHECK HERE !
 
-        # Begin the optimization,k
+        # Begin the optimization
         start_time = process_time()
         for n_iter in range(1, self.max_iter + 1):
 
@@ -1265,35 +1207,35 @@ class NMFProfiler:
 
             # Analytical solver...
             if self.solver == "analytical":
-                (
+                (  # ***
                     W,
-                    H1,
-                    H2,
-                    Beta1,
-                    Beta2,
+                    Hs[0],
+                    Hs[1],
+                    Betas[0],
+                    Betas[1],
                     self.params["eta1"],
                     self.params["eta2"],
-                    margH1,
-                    margH2,
-                    gradH1,
-                    gradH2,
+                    margHs[0],
+                    margHs[1],
+                    gradHs[0],
+                    gradHs[1],
                 ) = _analytic_solver(
-                    self.omic1,
-                    self.omic2,
+                    omics[0],
+                    omics[1],
                     Y,
                     W,
-                    H1,
-                    H2,
-                    Beta1,
-                    Beta2,
+                    Hs[0],
+                    Hs[1],
+                    Betas[0],
+                    Betas[1],
                     self.params,
                     as_sklearn=self.as_sklearn,
                     backtrack=self.backtrack,
                     max_iter_back=self.max_iter_back,
-                    H1_loss=margH1,
-                    H2_loss=margH2,
-                    H1_grad=gradH1,
-                    H2_grad=gradH2,
+                    H1_loss=margHs[0],
+                    H2_loss=margHs[1],
+                    H1_grad=gradHs[0],
+                    H2_grad=gradHs[1],
                     K=K,
                     K_gp=K_gp,
                     verbose=self.verbose,
@@ -1301,22 +1243,23 @@ class NMFProfiler:
 
             # ... or Autograd solver
             else:
-                W, H1, H2, Beta1, Beta2 = _autograd_solver()
+                W, Hs[0], Hs[1], Betas[0], Betas[1] = _autograd_solver()  # ***
 
             # Keep track of optimal etas
-            eta1.append(self.params["eta1"])
-            eta2.append(self.params["eta2"])
+            etas_current = [self.params["eta1"], self.params["eta1"]]  # ***
+            for j in range(len(etas_current)):  # ***
+                etas[j].append(etas_current[j])  # ***
 
             # Compute the new loss as well as specific terms
-            loss_ = _computeF(
-                self.omic1,
-                self.omic2,
+            loss_ = _computeF(  # ***
+                omics[0],
+                omics[1],
                 Y,
                 W,
-                H1,
-                H2,
-                Beta1,
-                Beta2,
+                Hs[0],
+                Hs[1],
+                Betas[0],
+                Betas[1],
                 self.params["mu"],
                 self.params["lambda"],
                 self.params["gamma"],
@@ -1325,33 +1268,28 @@ class NMFProfiler:
             )
             error.append(loss_[0])
             nb_iters.append(n_iter)
-            distort1.append(loss_[1])
-            distort2.append(loss_[2])
-            sparsity1.append(loss_[3])
-            sparsity2.append(loss_[4])
-            regul.append(loss_[5])
-            pred1.append(loss_[6])
-            pred2.append(loss_[7])
+            distorts[0].append(loss_[1])  # ***
+            distorts[1].append(loss_[2])  # ***
+            sparsitys[0].append(loss_[3])  # ***
+            sparsitys[1].append(loss_[4])  # ***
+            regul.append(loss_[5])  # ***
+            preds[0].append(loss_[6])  # ***
+            preds[1].append(loss_[7])  # ***
 
             # Monitor the LDA part
-            reg1 = sm.OLS(
-                self.y, sm.add_constant(safe_sparse_dot(self.omic1, H1.T))
-            ).fit()
-            reg2 = sm.OLS(
-                self.y, sm.add_constant(safe_sparse_dot(self.omic2, H2.T))
-            ).fit()
-            bet1_1.append(reg1.params[1])
-            bet2_1.append(reg1.params[2])
-            bet1_2.append(reg2.params[1])
-            bet2_2.append(reg2.params[2])
-            R2adj_1.append(reg1.rsquared_adj)
-            R2adj_2.append(reg2.rsquared_adj)
-            BIC_1.append(reg1.bic)
-            BIC_2.append(reg2.bic)
-            AIC_1.append(reg1.aic)
-            AIC_2.append(reg2.aic)
-            F_pval_1.append(reg1.f_pvalue)
-            F_pval_2.append(reg2.f_pvalue)
+            regs = [0] * len(omics)  # ***
+            for j in range(len(omics)):  # ***
+                regs[j] = sm.OLS(
+                    self.y, sm.add_constant(safe_sparse_dot(omics[j], Hs[j].T))
+                ).fit()
+
+            for j in range(len(omics)):
+                bets[j][0].append(regs[j].params[1])  # ***
+                bets[j][1].append(regs[j].params[2])  # ***
+                R2adjs[j].append(regs[j].rsquared_adj)
+                BICs[j].append(regs[j].bic)
+                AICs[j].append(regs[j].aic)
+                F_pvals[j].append(regs[j].f_pvalue)
 
             # Every 10 iterations, if tol is still strictly positive and
             # verbose == True, compute the loss value
@@ -1396,92 +1334,68 @@ class NMFProfiler:
         # --------------------------------
         self.n_iter = n_iter
         self.W = W
-        self.H1 = H1
-        self.H2 = H2
-        self.Beta1 = Beta1
-        self.Beta2 = Beta2
+        self.H1 = Hs[0]  # ***
+        self.H2 = Hs[1]  # ***
+        self.Beta1 = Betas[0]  # ***
+        self.Beta2 = Betas[1]  # ***
 
         # Keep track of final Hj gradients and build the Hj gradients matrix
         # (following up their evolution during optimization)
         # ------------------------------------------------------------------
-        gradH1.append(
-            multi_dot([W.T, W, H1])
-            + (
-                self.params["gamma"]
-                * multi_dot(
-                    [
-                        np.transpose(Beta1[newaxis]),
-                        Beta1[newaxis],
-                        H1,
-                        self.omic1.T,
-                        self.omic1,
-                    ]
-                )
-            )
-            - (
-                safe_sparse_dot(W.T, self.omic1)
+        for j in range(len(omics)):  # ***
+            gradHs[j].append(
+                multi_dot([W.T, W, Hs[j]])
                 + (
                     self.params["gamma"]
                     * multi_dot(
                         [
-                            np.transpose(Beta1[newaxis]),
-                            self.y[newaxis],
-                            self.omic1
+                            np.transpose(Betas[j][newaxis]),
+                            Betas[j][newaxis],
+                            Hs[j],
+                            omics[j].T,
+                            omics[j],
                         ]
                     )
                 )
-            )
-        )
-        gradH2.append(
-            multi_dot([W.T, W, H2])
-            + (
-                self.params["gamma"]
-                * multi_dot(
-                    [
-                        np.transpose(Beta2[newaxis]),
-                        Beta2[newaxis],
-                        H2,
-                        self.omic2.T,
-                        self.omic2,
-                    ]
-                )
-            )
-            - (
-                safe_sparse_dot(W.T, self.omic2)
-                + (
-                    self.params["gamma"]
-                    * multi_dot(
-                        [np.transpose(Beta2[newaxis]),
-                         self.y[newaxis], self.omic2]
+                - (
+                    safe_sparse_dot(W.T, omics[j])
+                    + (
+                        self.params["gamma"]
+                        * multi_dot(
+                            [
+                                np.transpose(Betas[j][newaxis]),
+                                self.y[newaxis],
+                                omics[j]
+                            ]
+                        )
                     )
                 )
             )
-        )
 
         grads = np.hstack(
             (
-                np.vstack([norm(i) ** 2 for i in gradH1]),
-                np.vstack([norm(i) ** 2 for i in gradH2]),
+                np.vstack([norm(i) ** 2 for i in gradHs[0]]),  # ***
+                np.vstack([norm(i) ** 2 for i in gradHs[1]]),  # ***
             )
         )
         self.df_grads = pd.DataFrame(grads, columns=["grad_H1", "grad_H2"])
 
         # Build the error terms matrix
         # ----------------------------
-        error_terms = np.hstack(
+        error_terms = np.hstack(  # ***
             (
                 np.vstack(nb_iters),
-                np.vstack(distort1),
-                np.vstack(distort2),
-                np.vstack(sparsity1),
-                np.vstack(sparsity2),
+                np.vstack(distorts[0]),
+                np.vstack(distorts[1]),
+                np.vstack(sparsitys[0]),
+                np.vstack(sparsitys[1]),
                 np.vstack(regul),
-                np.vstack(pred1),
-                np.vstack(pred2),
+                np.vstack(preds[0]),
+                np.vstack(preds[1]),
                 np.vstack(error),
             )
         )
-        self.df_errors = pd.DataFrame(
+        self.df_errors = pd.DataFrame(  # ***
             error_terms,
             columns=[
                 "iter",
@@ -1498,24 +1412,24 @@ class NMFProfiler:
 
         # Build the LDA performance matrix
         # --------------------------------
-        LDA_perf = np.hstack(
+        LDA_perf = np.hstack(  # ***
             (
                 np.vstack(nb_iters),
-                np.vstack(bet1_1),
-                np.vstack(bet1_2),
-                np.vstack(R2adj_1),
-                np.vstack(BIC_1),
-                np.vstack(AIC_1),
-                np.vstack(F_pval_1),
-                np.vstack(bet1_2),
-                np.vstack(bet2_2),
-                np.vstack(R2adj_2),
-                np.vstack(BIC_2),
-                np.vstack(AIC_2),
-                np.vstack(F_pval_2),
+                np.vstack(bets[0][0]),
+                np.vstack(bets[0][1]),
+                np.vstack(R2adjs[0]),
+                np.vstack(BICs[0]),
+                np.vstack(AICs[0]),
+                np.vstack(F_pvals[0]),
+                np.vstack(bets[1][0]),
+                np.vstack(bets[1][1]),
+                np.vstack(R2adjs[1]),
+                np.vstack(BICs[1]),
+                np.vstack(AICs[1]),
+                np.vstack(F_pvals[1]),
             )
         )
-        self.df_ldaperf = pd.DataFrame(
+        self.df_ldaperf = pd.DataFrame(  # ***
             LDA_perf,
             columns=[
                 "iter",
@@ -1537,8 +1451,8 @@ class NMFProfiler:
         # Build the etas matrix
         # (following up etas evolution during optimization)
         # -------------------------------------------------
-        etas = np.hstack((np.vstack(eta1), np.vstack(eta2)))
-        self.df_etas = pd.DataFrame(etas, columns=["eta_omic1", "eta_omic2"])
+        Etas = np.hstack((np.vstack(etas[0]), np.vstack(etas[1])))  # ***
+        self.df_etas = pd.DataFrame(Etas, columns=["eta_omic1", "eta_omic2"])
 
         return self
 
-- 
GitLab


From 4f9f80b7eb0287762d34a5f1dd321f1e91043719 Mon Sep 17 00:00:00 2001
From: Aurelie Mercadie <aurelie.mercadie@inrae.fr>
Date: Wed, 27 Nov 2024 12:30:01 +0100
Subject: [PATCH 09/19] generalized fit function (2nd step)

---
 nmfprofiler/nmfprofiler.py | 151 ++++++++++++++++---------------------
 1 file changed, 67 insertions(+), 84 deletions(-)

diff --git a/nmfprofiler/nmfprofiler.py b/nmfprofiler/nmfprofiler.py
index 336b9aa..baf3c4a 100644
--- a/nmfprofiler/nmfprofiler.py
+++ b/nmfprofiler/nmfprofiler.py
@@ -11,6 +11,8 @@ groups
 
 from copy import deepcopy
 
+import itertools
+
 import warnings
 import matplotlib.pyplot as plt
 import seaborn as sns
@@ -1089,49 +1091,23 @@ class NMFProfiler:
         Betas = [deepcopy(Betas_0[j]) for j in range(len(omics))]
 
         # Create lists of error terms to enrich iteratively
-        (  # ***
-            error,
-            margH1,
-            margH2,
-            gradH1,
-            gradH2,
-            distort1,
-            distort2,
-            sparsity1,
-            sparsity2,
-            regul,
-            pred1,
-            pred2,
-            nb_iters,
-        ) = ([] for i in range(13))
-
-        margHs = [margH1, margH2]  # ***
-        gradHs = [gradH1, gradH2]  # ***
-        distorts = [distort1, distort2]  # ***
-        sparsitys = [sparsity1, sparsity2]  # ***
-        preds = [pred1, pred2]  # ***
+        (error, regul, nb_iters) = ([] for i in range(3))
+        (
+            margHs,
+            gradHs,
+            distorts,
+            sparsitys,
+            preds,
+        ) = ([[] for j in range(len(omics))] for _ in range(5))
 
         # Create lists of LDA performance indicators to enrich iteratively
-        (  # ***
-            R2adj_1,
-            R2adj_2,
-            BIC_1,
-            BIC_2,
-            AIC_1,
-            AIC_2,
-            F_pval_1,
-            F_pval_2,
-            bet1_1,
-            bet1_2,
-            bet2_1,
-            bet2_2,
-        ) = ([] for i in range(12))
-
-        R2adjs = [R2adj_1, R2adj_2]  # ***
-        BICs = [BIC_1, BIC_2]  # ***
-        AICs = [AIC_1, AIC_2]  # ***
-        F_pvals = [F_pval_1, F_pval_2]  # ***
-        bets = [[bet1_1, bet1_2], [bet2_1, bet2_2]]  # ***
+        (
+            R2adjs,
+            BICs,
+            AICs,
+            F_pvals,
+        ) = ([[] for j in range(len(omics))] for _ in range(4))
+        bets = [[[] for k in range(K)] for j in range(len(omics))]
 
         loss_init = _computeF(  # ***
             omics[0],
@@ -1175,15 +1151,15 @@ class NMFProfiler:
             )
 
         # LDAs with initial matrices
-        regs_init = [0] * len(omics)  # ***
+        regs_init = [0 for j in range(len(omics))]  # ***
         for j in range(len(omics)):  # ***
             regs_init[j] = sm.OLS(
                 self.y, sm.add_constant(safe_sparse_dot(omics[j], Hs_0[j].T))
             ).fit()
 
         for j in range(len(omics)):  # ***
-            bets[j][0].append(regs_init[j].params[1])
-            bets[j][1].append(regs_init[j].params[2])
+            for k in range(K):
+                bets[j][k].append(regs_init[j].params[k+1])
             R2adjs[j].append(regs_init[j].rsquared_adj)
             BICs[j].append(regs_init[j].bic)
             AICs[j].append(regs_init[j].aic)
@@ -1195,9 +1171,9 @@ class NMFProfiler:
 
         # To keep track of the choice of etas
         etas_init = [self.params["eta1"], self.params["eta2"]]  # ***
-        etas = [[0] for _ in range(len(omics))]  # ***
-        for j in range(len(etas)):
-            etas[j].append(etas_init[j])  # CHECK HERE !
+        etas = [[] for j in range(len(omics))]  # ***
+        for j in range(len(omics)):
+            etas[j].append(etas_init[j])
 
         # Begin the optimization
         start_time = process_time()
@@ -1246,7 +1222,7 @@ class NMFProfiler:
                 W, Hs[0], Hs[1], Betas[0], Betas[1] = _autograd_solver()  # ***
 
             # Keep track of optimal etas
-            etas_current = [self.params["eta1"], self.params["eta1"]]  # ***
+            etas_current = [self.params["eta1"], self.params["eta2"]]  # ***
             for j in range(len(etas_current)):  # ***
                 etas[j].append(etas_current[j])  # ***
 
@@ -1277,15 +1253,15 @@ class NMFProfiler:
             preds[1].append(loss_[7])  # ***
 
             # Monitor the LDA part
-            regs = [0] * len(omics)  # ***
+            regs = [0 for j in range(len(omics))]  # ***
             for j in range(len(omics)):  # ***
                 regs[j] = sm.OLS(
                     self.y, sm.add_constant(safe_sparse_dot(omics[j], Hs[j].T))
                 ).fit()
 
             for j in range(len(omics)):
-                bets[j][0].append(regs[j].params[1])  # ***
-                bets[j][1].append(regs[j].params[2])  # ***
+                for k in range(K):
+                    bets[j][k].append(regs[j].params[k+1])
                 R2adjs[j].append(regs[j].rsquared_adj)
                 BICs[j].append(regs[j].bic)
                 AICs[j].append(regs[j].aic)
@@ -1373,12 +1349,16 @@ class NMFProfiler:
             )
 
         grads = np.hstack(
-            (
-                np.vstack([norm(i) ** 2 for i in gradHs[0]]),  # ***
-                np.vstack([norm(i) ** 2 for i in gradHs[1]]),  # ***
-            )
+            ([
+                np.vstack(
+                    [norm(i) ** 2 for i in gradHs[j]]
+                ) for j in range(len(omics))  # ***
+            ])
+        )
+        self.df_grads = pd.DataFrame(
+            grads,
+            columns=[f'grad_H{j+1}' for j in range(len(omics))]  # ***
         )
-        self.df_grads = pd.DataFrame(grads, columns=["grad_H1", "grad_H2"])
 
         # Build the error terms matrix
         # ----------------------------
@@ -1395,19 +1375,26 @@ class NMFProfiler:
                 np.vstack(error),
             )
         )
-        self.df_errors = pd.DataFrame(  # ***
+        # error_terms = np.hstack(  # ***
+        #     (
+        #         np.vstack(nb_iters),
+        #         (np.vstack(distorts[j]) for j in range(len(omics))),
+        #         (np.vstack(sparsitys[j]) for j in range(len(omics))),
+        #         np.vstack(regul),
+        #         (np.vstack(preds[j]) for j in range(len(omics))),
+        #         np.vstack(error),
+        #     )
+        # )
+        self.df_errors = pd.DataFrame(
             error_terms,
-            columns=[
-                "iter",
-                "distort1",
-                "distort2",
-                "sparsity1",
-                "sparsity2",
-                "regul",
-                "pred1",
-                "pred2",
-                "loss",
-            ],
+            columns=list(itertools.chain.from_iterable([
+                ["iter"],
+                [f"distort{j+1}" for j in range(len(omics))],  # ***
+                [f"sparsity{j+1}" for j in range(len(omics))],  # ***
+                ["regul"],
+                [f"pred{j+1}" for j in range(len(omics))],  # ***
+                ["loss"],
+            ])),
         )
 
         # Build the LDA performance matrix
@@ -1429,23 +1416,19 @@ class NMFProfiler:
                 np.vstack(F_pvals[1]),
             )
         )
-        self.df_ldaperf = pd.DataFrame(  # ***
+        self.df_ldaperf = pd.DataFrame(
             LDA_perf,
-            columns=[
-                "iter",
-                "Comp.1 coef (omic1)",
-                "Comp.2 coef (omic1)",
-                "R2 Adjusted (omic1)",
-                "BIC (omic1)",
-                "AIC (omic1)",
-                "F-statistic p-value (omic1)",
-                "Comp.1 coef (omic2)",
-                "Comp.2 coef (omic2)",
-                "R2 Adjusted (omic2)",
-                "BIC (omic2)",
-                "AIC (omic2)",
-                "F-statistic p-value (omic2)",
-            ],
+            columns=list(itertools.chain.from_iterable([
+                ["iter"],
+                list(itertools.chain.from_iterable([[
+                    f"Comp.1 coef (omic{j+1})",
+                    f"Comp.2 coef (omic{j+1})",
+                    f"R2 Adjusted (omic{j+1})",
+                    f"BIC (omic{j+1})",
+                    f"AIC (omic{j+1})",
+                    f"F-statistic p-value (omic{j+1})"
+                ] for j in range(len(omics))])),  # ***
+            ])),
         )
 
         # Build the etas matrix
-- 
GitLab


From 83832831a8e29b34afc608fbd49fbf61e780e4e1 Mon Sep 17 00:00:00 2001
From: Aurelie Mercadie <aurelie.mercadie@inrae.fr>
Date: Wed, 27 Nov 2024 14:32:03 +0100
Subject: [PATCH 10/19] generalized fit function (last step)

---
 nmfprofiler/nmfprofiler.py | 94 +++++++++++++++-----------------------
 1 file changed, 38 insertions(+), 56 deletions(-)

diff --git a/nmfprofiler/nmfprofiler.py b/nmfprofiler/nmfprofiler.py
index baf3c4a..5f3f6d6 100644
--- a/nmfprofiler/nmfprofiler.py
+++ b/nmfprofiler/nmfprofiler.py
@@ -1050,14 +1050,13 @@ class NMFProfiler:
         _check_inputs_NMFProfiler(self)
 
         # Extract K, the number of latent components, as equal
-        # to the number of distinct groups in y
+        # to the number of distinct groups, U, in y
         K = len(np.unique(self.y))
         # For now, initialize K_gp, the number of latent components
         # dedicated to groups, identically to K
         K_gp = len(np.unique(self.y))
 
-        # Automatically convert a vector encoding U groups into
-        # a one-hot encoded matrix
+        # Automatically convert y into a one-hot encoded matrix Y
         encoder = OneHotEncoder(handle_unknown="ignore")
         Y = encoder.fit_transform(self.y.reshape(-1, 1)).toarray()
 
@@ -1087,8 +1086,8 @@ class NMFProfiler:
         # Create matrices and vectors to update using initialization
         # (and keep intact initialized matrices and vectors)
         W = deepcopy(W0)
-        Hs = [deepcopy(Hs_0[j]) for j in range(len(omics))]
-        Betas = [deepcopy(Betas_0[j]) for j in range(len(omics))]
+        Hs = [deepcopy(Hs_0[j]) for j in range(len(omics))]  # ***
+        Betas = [deepcopy(Betas_0[j]) for j in range(len(omics))]  # ***
 
         # Create lists of error terms to enrich iteratively
         (error, regul, nb_iters) = ([] for i in range(3))
@@ -1098,7 +1097,7 @@ class NMFProfiler:
             distorts,
             sparsitys,
             preds,
-        ) = ([[] for j in range(len(omics))] for _ in range(5))
+        ) = ([[] for j in range(len(omics))] for _ in range(5))  # ***
 
         # Create lists of LDA performance indicators to enrich iteratively
         (
@@ -1106,8 +1105,8 @@ class NMFProfiler:
             BICs,
             AICs,
             F_pvals,
-        ) = ([[] for j in range(len(omics))] for _ in range(4))
-        bets = [[[] for k in range(K)] for j in range(len(omics))]
+        ) = ([[] for j in range(len(omics))] for _ in range(4))  # ***
+        bets = [[[] for k in range(K)] for j in range(len(omics))]  # ***
 
         loss_init = _computeF(  # ***
             omics[0],
@@ -1136,7 +1135,7 @@ class NMFProfiler:
 
         # Keep track of marginal objective functions in Hj with
         # initial matrices (necessary for linesearch first execution)
-        for j in range(len(omics)):
+        for j in range(len(omics)):  # ***
             margHs[j].append(
                 _computeMargH(
                     omics[j],
@@ -1172,7 +1171,7 @@ class NMFProfiler:
         # To keep track of the choice of etas
         etas_init = [self.params["eta1"], self.params["eta2"]]  # ***
         etas = [[] for j in range(len(omics))]  # ***
-        for j in range(len(omics)):
+        for j in range(len(omics)):  # ***
             etas[j].append(etas_init[j])
 
         # Begin the optimization
@@ -1348,13 +1347,11 @@ class NMFProfiler:
                 )
             )
 
-        grads = np.hstack(
-            ([
-                np.vstack(
-                    [norm(i) ** 2 for i in gradHs[j]]
-                ) for j in range(len(omics))  # ***
-            ])
-        )
+        grads = np.array(  # ***
+            [
+                [norm(i) ** 2 for i in gradHs[j]] for j in range(len(omics))
+            ]
+        ).T
         self.df_grads = pd.DataFrame(
             grads,
             columns=[f'grad_H{j+1}' for j in range(len(omics))]  # ***
@@ -1362,29 +1359,16 @@ class NMFProfiler:
 
         # Build the error terms matrix
         # ----------------------------
-        error_terms = np.hstack(  # ***
-            (
+        error_terms = np.hstack(
+            [
                 np.vstack(nb_iters),
-                np.vstack(distorts[0]),
-                np.vstack(distorts[1]),
-                np.vstack(sparsitys[0]),
-                np.vstack(sparsitys[1]),
+                np.array(distorts).T,
+                np.array(sparsitys).T,
                 np.vstack(regul),
-                np.vstack(preds[0]),
-                np.vstack(preds[1]),
+                np.array(preds).T,
                 np.vstack(error),
-            )
+            ]
         )
-        # error_terms = np.hstack(  # ***
-        #     (
-        #         np.vstack(nb_iters),
-        #         (np.vstack(distorts[j]) for j in range(len(omics))),
-        #         (np.vstack(sparsitys[j]) for j in range(len(omics))),
-        #         np.vstack(regul),
-        #         (np.vstack(preds[j]) for j in range(len(omics))),
-        #         np.vstack(error),
-        #     )
-        # )
         self.df_errors = pd.DataFrame(
             error_terms,
             columns=list(itertools.chain.from_iterable([
@@ -1399,30 +1383,26 @@ class NMFProfiler:
 
         # Build the LDA performance matrix
         # --------------------------------
-        LDA_perf = np.hstack(  # ***
-            (
-                np.vstack(nb_iters),
-                np.vstack(bets[0][0]),
-                np.vstack(bets[0][1]),
-                np.vstack(R2adjs[0]),
-                np.vstack(BICs[0]),
-                np.vstack(AICs[0]),
-                np.vstack(F_pvals[0]),
-                np.vstack(bets[1][0]),
-                np.vstack(bets[1][1]),
-                np.vstack(R2adjs[1]),
-                np.vstack(BICs[1]),
-                np.vstack(AICs[1]),
-                np.vstack(F_pvals[1]),
+        LDA_perf = np.vstack(nb_iters)
+        for j in range(len(omics)):
+            bets_j = np.array(bets[j]).T
+            perf_j = np.array(
+                [
+                    R2adjs[j],
+                    BICs[j],
+                    AICs[j],
+                    F_pvals[j],
+                ]
+            ).T
+            LDA_perf = np.hstack(
+                [LDA_perf, bets_j, perf_j]
             )
-        )
         self.df_ldaperf = pd.DataFrame(
             LDA_perf,
             columns=list(itertools.chain.from_iterable([
                 ["iter"],
                 list(itertools.chain.from_iterable([[
-                    f"Comp.1 coef (omic{j+1})",
-                    f"Comp.2 coef (omic{j+1})",
+                    *[f"Comp.{k+1} coef (omic{j+1})" for k in range(K)],
                     f"R2 Adjusted (omic{j+1})",
                     f"BIC (omic{j+1})",
                     f"AIC (omic{j+1})",
@@ -1434,8 +1414,10 @@ class NMFProfiler:
         # Build the etas matrix
         # (following up etas evolution during optimization)
         # -------------------------------------------------
-        Etas = np.hstack((np.vstack(etas[0]), np.vstack(etas[1])))  # ***
-        self.df_etas = pd.DataFrame(Etas, columns=["eta_omic1", "eta_omic2"])
+        self.df_etas = pd.DataFrame(
+            np.array(etas).T,
+            columns=[f"eta_omic{j+1}" for j in range(len(omics))]  # ***
+        )
 
         return self
 
-- 
GitLab


From a071ad6c6cb9144cd0d4863d4dfeb5934e3699f7 Mon Sep 17 00:00:00 2001
From: Aurelie Mercadie <aurelie.mercadie@inrae.fr>
Date: Wed, 27 Nov 2024 15:43:43 +0100
Subject: [PATCH 11/19] extended generalization + checked plot methods

---
 nmfprofiler/nmfprofiler.py | 185 +++++++++++++++++++------------------
 1 file changed, 95 insertions(+), 90 deletions(-)

diff --git a/nmfprofiler/nmfprofiler.py b/nmfprofiler/nmfprofiler.py
index 5f3f6d6..2edeff6 100644
--- a/nmfprofiler/nmfprofiler.py
+++ b/nmfprofiler/nmfprofiler.py
@@ -39,7 +39,7 @@ def _prox_l1_pos(v, lambd):
     return np.maximum(v - lambd, 0)
 
 
-def _update_W(W, omic1, omic2, H1, H2, mu):
+def _update_W(W, omic1, omic2, H1, H2, mu):  # ***
     """Update the matrix containing contributions of individuals to latent
     components.
 
@@ -63,8 +63,8 @@ def _update_W(W, omic1, omic2, H1, H2, mu):
     -------
     Newly updated W.
     """
-    omics = [omic1, omic2]
-    Hs = [H1, H2]
+    omics = [omic1, omic2]  # ***
+    Hs = [H1, H2]  # ***
 
     B = mu * np.identity(W.shape[1])
     for j in range(len(Hs)):
@@ -195,7 +195,7 @@ def _update_Beta(omic, h_k, y_k, gamma, Beta):
     return Beta
 
 
-def _computeF(
+def _computeF(  # ***
     omic1,
     omic2,
     Y,
@@ -248,9 +248,9 @@ def _computeF(
     Either the value of the objective function (i.e. the global loss) alone,
     or accompagned by each specific term to obtain it.
     """
-    omics = [omic1, omic2]
-    Hs = [H1, H2]
-    Betas = [Beta1, Beta2]
+    omics = [omic1, omic2]  # ***
+    Hs = [H1, H2]  # ***
+    Betas = [Beta1, Beta2]  # ***
 
     regul = (mu / 2) * np.trace(safe_sparse_dot(W.T, W))
     distort, sparse, pred = ([] for i in range(3))
@@ -267,7 +267,7 @@ def _computeF(
     loss = sum(distort) + regul + sum(sparse) + sum(pred)
 
     if details:
-        res = sum([[loss], distort, sparse, [regul], pred], [])
+        res = sum([[loss], distort, sparse, [regul], pred], [])  # ***
 
     return loss if details is False else res
 
@@ -418,7 +418,7 @@ def _linesearch(
     return H, H_loss, current_eta
 
 
-def _analytic_solver(
+def _analytic_solver(  # ***
     omic1,
     omic2,
     Y,
@@ -508,15 +508,15 @@ def _analytic_solver(
     See (Fevotte, 2011) and sklearn.decomposition.NMF source code for
     choice of parameters.
     """
-    omics = [omic1, omic2]
-    Hs = [H1, H2]
-    Betas = [Beta1, Beta2]
-    Hlosses = [H1_loss, H2_loss]
-    Hgrads = [H1_grad, H2_grad]
-    etas = [params["eta1"], params["eta2"]]
+    omics = [omic1, omic2]  # ***
+    Hs = [H1, H2]  # ***
+    Betas = [Beta1, Beta2]  # ***
+    Hlosses = [H1_loss, H2_loss]  # ***
+    Hgrads = [H1_grad, H2_grad]  # ***
+    etas = [params["eta1"], params["eta2"]]  # ***
 
     # W update with new values of Hs
-    W_new = _update_W(W, omics[0], omics[1], Hs[0], Hs[1], params["mu"])
+    W_new = _update_W(W, omics[0], omics[1], Hs[0], Hs[1], params["mu"])  # ***
 
     # Hs updates (either MU or Proximal)
     Hs_new = [0] * len(omics)
@@ -572,43 +572,35 @@ def _analytic_solver(
     # Betas updates with new values of Hs
     # Compute each regression coef. of each component separately
     # and gather them in a unique vector
-    Betas_new = [0] * len(omics)
+    Betas_new = [0 for j in range(len(omics))]
     for j in range(len(omics)):
         Betas_new[j] = np.array(
             [
                 _update_Beta(
                     omics[j],
-                    Hs_new[j][0, :],
-                    Y[:, 0],
+                    Hs_new[j][k, :],
+                    Y[:, k],
                     params["gamma"],
-                    Betas[j][0]
-                ),
-                _update_Beta(
-                    omics[j],
-                    Hs_new[j][1, :],
-                    Y[:, 1],
-                    params["gamma"],
-                    Betas[j][1]
-                ),
+                    Betas[j][k]
+                ) for k in range(K)
             ]
         )
-
         # Put to zero all coefficients linked to components k when k > K_gp
         if K_gp < K:
             Betas_new[j][K_gp:] = 0
 
-    H1_new = Hs_new[0]
-    H2_new = Hs_new[1]
-    Beta1_new = Betas_new[0]
-    Beta2_new = Betas_new[1]
-    params["eta1"] = etas[0]
-    params["eta2"] = etas[1]
-    H1_loss = Hlosses[0]
-    H2_loss = Hlosses[1]
-    H1_grad = Hgrads[0]
-    H2_grad = Hgrads[1]
-
-    return (
+    H1_new = Hs_new[0]  # ***
+    H2_new = Hs_new[1]  # ***
+    Beta1_new = Betas_new[0]  # ***
+    Beta2_new = Betas_new[1]  # ***
+    params["eta1"] = etas[0]  # ***
+    params["eta2"] = etas[1]  # ***
+    H1_loss = Hlosses[0]  # ***
+    H2_loss = Hlosses[1]  # ***
+    H1_grad = Hgrads[0]  # ***
+    H2_grad = Hgrads[1]  # ***
+
+    return (  # ***
         W_new,
         H1_new,
         H2_new,
@@ -842,12 +834,12 @@ class NMFProfiler:
     >>> model.barplot_error(height=6, width=15, path="")
     """
 
-    def __init__(
+    def __init__(  # ***
         self,
         omic1,
         omic2,
         y,
-        params={
+        params={  # ***
             "gamma": 1e-2,
             "lambda": 1e-3,
             "mu": 1e-3,
@@ -868,8 +860,8 @@ class NMFProfiler:
         verbose=False,
     ):
 
-        self.omic1 = omic1
-        self.omic2 = omic2
+        self.omic1 = omic1  # ***
+        self.omic2 = omic2  # ***
         self.y = y
         self.params = params
         self.init_method = init_method
@@ -882,7 +874,7 @@ class NMFProfiler:
         self.seed = seed
         self.verbose = verbose
 
-    def __str__(self):
+    def __str__(self):  # ***
         """Briefly describe inputs and outputs of NMFProfiler."""
         print_statement = (
             "NMFProfiler\n-----------\n\nAnalysis run on dataset 1 containing "
@@ -916,20 +908,20 @@ class NMFProfiler:
         Apply a min-max normalization and divide by the square root
         of the number of features.
         """
-        omics = [self.omic1, self.omic2]
+        omics = [self.omic1, self.omic2]  # ***
 
         # For each dataset
-        for j in range(len(omics)):
-            for i in range(omics[j].shape[1]):
-                omics[j][:, i] = (
-                    omics[j][:, i] - np.min(omics[j][:, i])
+        for j in range(len(omics)):  # ***
+            for i in range(omics[j].shape[1]):  # ***
+                omics[j][:, i] = (  # ***
+                    omics[j][:, i] - np.min(omics[j][:, i])  # ***
                 ) / (
-                    np.max(omics[j][:, i]) - np.min(omics[j][:, i])
+                    np.max(omics[j][:, i]) - np.min(omics[j][:, i])  # ***
                 )
-            omics[j] = omics[j] * (1 / np.sqrt(omics[j].shape[1]))
+            omics[j] = omics[j] * (1 / np.sqrt(omics[j].shape[1]))  # ***
 
-        self.omic1 = omics[0]
-        self.omic2 = omics[1]
+        self.omic1 = omics[0]  # ***
+        self.omic2 = omics[1]  # ***
 
     def _initialize_w_h_beta(self):
         """Initialize matrices W, H^j and Beta^j.
@@ -941,7 +933,7 @@ class NMFProfiler:
         ----
         Based on _initialize_nmf of sklearn.decomposition.NMF.
         """
-        omicstemp = [self.omic1, self.omic2]
+        omicstemp = [self.omic1, self.omic2]  # ***
 
         # Extract K, the number of latent components, as equal to
         # the number of distinct groups in y
@@ -952,9 +944,9 @@ class NMFProfiler:
 
         # For W, H1 and H2:
         # 1.0. Initialize H0s list
-        H0s = [0] * len(omicstemp)
+        H0s = [0 for j in range(len(omicstemp))]  # ***
         # 1.1. Concatenate omics data sets
-        omics = np.concatenate(omicstemp, axis=1)
+        omics = np.concatenate(omicstemp, axis=1)  # ***
         # 1.2. Initialize using sklearn function
         if self.init_method == "random2":
             # 1.2a. Initialize W with both omics and Hj
@@ -965,9 +957,9 @@ class NMFProfiler:
                 init="random",
                 random_state=self.seed
             )
-            for j in range(len(omicstemp)):
+            for j in range(len(omicstemp)):  # ***
                 *_, H0s[j] = _initialize_nmf(
-                    X=omicstemp[j],
+                    X=omicstemp[j],  # ***
                     n_components=K,
                     init="random",
                     random_state=self.seed
@@ -977,12 +969,12 @@ class NMFProfiler:
             # 1.2b. FOR IDENTICAL OMICS DATA SETS - Initialize W
             # with both omics, H1 with omic1 and H2 as H1 (random)
             W0, H0s[0] = _initialize_nmf(
-                X=omicstemp[0],
+                X=omicstemp[0],  # ***
                 n_components=K,
                 init="random",
                 random_state=self.seed
             )
-            for j in range(1, len(omicstemp)):
+            for j in range(1, len(omicstemp)):  # ***
                 H0s[j] = H0s[0].copy()
         elif self.init_method == "nndsvd2":
             # 1.2c. Initialize W with both omics and Hj
@@ -993,9 +985,9 @@ class NMFProfiler:
                 init="nndsvd",
                 random_state=self.seed
             )
-            for j in range(len(omicstemp)):
+            for j in range(len(omicstemp)):  # ***
                 *_, H0s[j] = _initialize_nmf(
-                    X=omicstemp[j],
+                    X=omicstemp[j],  # ***
                     n_components=K,
                     init="nndsvd",
                     random_state=self.seed
@@ -1004,15 +996,15 @@ class NMFProfiler:
         elif self.init_method == "nndsvd3":
             # 1.2d. Initialize W with each omic then take the mean and
             # initialize Hj with specific omic (nndsvd)
-            W_js = [0] * len(omicstemp)
-            for j in range(len(omicstemp)):
+            W_js = [0 for j in range(len(omics))]  # ***
+            for j in range(len(omicstemp)):  # ***
                 W_js[j], H0s[j] = _initialize_nmf(
-                    X=omicstemp[j],
+                    X=omicstemp[j],  # ***
                     n_components=K,
                     init="nndsvd",
                     random_state=self.seed
                 )
-            W0 = (1 / len(omicstemp)) * (sum(W_js))
+            W0 = (1 / len(omicstemp)) * (sum(W_js))  # ***
             del W_js
         else:
             # 1.2e. Initialize both with all omics, using whatever method
@@ -1024,25 +1016,33 @@ class NMFProfiler:
                 random_state=self.seed
             )
             # 1.2e. Split H matrix
+            if len(omicstemp) == 2:
+                indices_or_sections = [omicstemp[0].shape[1]]  # ***
+            else:
+                indices_or_sections = [
+                    omicstemp[j].shape[1] for j in range(len(omicstemp))  # ***
+                ]
             H0s = np.split(
-                ary=H0, indices_or_sections=[self.omic1.shape[1]], axis=1
+                ary=H0,
+                indices_or_sections=indices_or_sections,
+                axis=1
             )
             del H0
 
         # For Betas:
-        Beta0s = [0] * len(omicstemp)
-        for j in range(len(omicstemp)):
+        Beta0s = [0 for j in range(len(omics))]  # ***
+        for j in range(len(omicstemp)):  # ***
             Beta0s[j] = np.repeat(1, K)
             # Put to zero all coefficients linked to component k when k > K_gp
             if K_gp < K:
                 Beta0s[j][K_gp:] = 0
 
-        H1_0 = H0s[0]
-        H2_0 = H0s[1]
-        Beta1_0 = Beta0s[0]
-        Beta2_0 = Beta0s[1]
+        H1_0 = H0s[0]  # ***
+        H2_0 = H0s[1]  # ***
+        Beta1_0 = Beta0s[0]  # ***
+        Beta2_0 = Beta0s[1]  # ***
 
-        return W0, H1_0, H2_0, Beta1_0, Beta2_0
+        return W0, H1_0, H2_0, Beta1_0, Beta2_0  # ***
 
     def fit(self):
         """Run NMFProfiler."""
@@ -1436,7 +1436,7 @@ class NMFProfiler:
             in each omic.
         :projs: list of arrays. Projection onto each Hj matrix
         """
-        Hs = [self.H1, self.H2]
+        Hs = [self.H1, self.H2]  # ***
 
         # Initialize projection list and compute
         projs = []
@@ -1475,19 +1475,22 @@ class NMFProfiler:
         ------
         Return a barplot of the different error terms.
         """
-        data_barplot = [
-            ["reconstruction omic1", self.df_errors.iloc[-1, 1]],
-            ["reconstruction omic2", self.df_errors.iloc[-1, 2]],
-            ["l2 penalty on W", self.df_errors.iloc[-1, 5]],
-            ["l1 penalty on H1", self.df_errors.iloc[-1, 3]],
-            ["l1 penalty on H2", self.df_errors.iloc[-1, 4]],
-            ["supervised part omic1", self.df_errors.iloc[-1, 6]],
-            ["supervised part omic2", self.df_errors.iloc[-1, 7]],
-        ]
+        J = len([self.omic1, self.omic2])  # ***
+        data_barplot = np.array([
+            [
+                *[f"reconstruction omic{j+1}" for j in range(J)],
+                *[f"l1 penalty on H{j+1}" for j in range(J)],
+                "l2 penalty on W",
+                *[f"supervised part omic{j+1}" for j in range(J)],
+            ],
+            self.df_errors.iloc[-1, 1:-1].tolist(),
+        ]).T
         df_bar = pd.DataFrame(data_barplot, columns=["part", "value"])
+        df_bar["value"] = df_bar["value"].astype(float)
 
         plt.figure(figsize=(width, height))
-        sns.barplot(data=df_bar, x="part", y="value")
+        g = sns.barplot(data=df_bar, x="part", y="value")
+        g.set(xlabel=None)
         plt.savefig(str(path + "BarplotErrors.png"))
         plt.show()
 
@@ -1543,8 +1546,8 @@ class NMFProfiler:
         """
         plt.figure(figsize=(width, height))
 
-        omics = [self.omic1, self.omic2]
-        Hs = [self.H1, self.H2]
+        omics = [self.omic1, self.omic2]  # ***
+        Hs = [self.H1, self.H2]  # ***
 
         if obj_to_viz == "W":
             sns.heatmap(pd.DataFrame(self.W), cmap="viridis")
@@ -1562,7 +1565,9 @@ class NMFProfiler:
         if obj_to_viz == "W":
             plotname = str(path + obj_to_viz + "_Heatmap.png")
         else:
-            plotname = str(path + obj_to_viz + omic_number + "_Heatmap.png")
+            plotname = str(
+                path + obj_to_viz + str(omic_number) + "_Heatmap.png"
+            )
 
         plt.savefig(plotname)
         plt.show()
-- 
GitLab


From acd854ab86f7d15ef3c8d51759d4a45a1c788a00 Mon Sep 17 00:00:00 2001
From: Aurelie Mercadie <aurelie.mercadie@inrae.fr>
Date: Wed, 27 Nov 2024 16:37:54 +0100
Subject: [PATCH 12/19] adapted attributes

---
 nmfprofiler/nmfprofiler.py           | 342 +++++++++++----------------
 nmfprofiler/utils/validate_inputs.py |  29 ++-
 2 files changed, 150 insertions(+), 221 deletions(-)

diff --git a/nmfprofiler/nmfprofiler.py b/nmfprofiler/nmfprofiler.py
index 2edeff6..0380bdd 100644
--- a/nmfprofiler/nmfprofiler.py
+++ b/nmfprofiler/nmfprofiler.py
@@ -39,7 +39,7 @@ def _prox_l1_pos(v, lambd):
     return np.maximum(v - lambd, 0)
 
 
-def _update_W(W, omic1, omic2, H1, H2, mu):  # ***
+def _update_W(W, omics, Hs, mu):
     """Update the matrix containing contributions of individuals to latent
     components.
 
@@ -63,9 +63,6 @@ def _update_W(W, omic1, omic2, H1, H2, mu):  # ***
     -------
     Newly updated W.
     """
-    omics = [omic1, omic2]  # ***
-    Hs = [H1, H2]  # ***
-
     B = mu * np.identity(W.shape[1])
     for j in range(len(Hs)):
         B += safe_sparse_dot(Hs[j], Hs[j].T)
@@ -195,15 +192,12 @@ def _update_Beta(omic, h_k, y_k, gamma, Beta):
     return Beta
 
 
-def _computeF(  # ***
-    omic1,
-    omic2,
+def _computeF(
+    omics,
     Y,
     W,
-    H1,
-    H2,
-    Beta1,
-    Beta2,
+    Hs,
+    Betas,
     mu,
     lambdA,
     gamma,
@@ -248,10 +242,6 @@ def _computeF(  # ***
     Either the value of the objective function (i.e. the global loss) alone,
     or accompagned by each specific term to obtain it.
     """
-    omics = [omic1, omic2]  # ***
-    Hs = [H1, H2]  # ***
-    Betas = [Beta1, Beta2]  # ***
-
     regul = (mu / 2) * np.trace(safe_sparse_dot(W.T, W))
     distort, sparse, pred = ([] for i in range(3))
     for j in range(len(omics)):
@@ -267,7 +257,7 @@ def _computeF(  # ***
     loss = sum(distort) + regul + sum(sparse) + sum(pred)
 
     if details:
-        res = sum([[loss], distort, sparse, [regul], pred], [])  # ***
+        res = [loss, distort, sparse, regul, pred]
 
     return loss if details is False else res
 
@@ -418,23 +408,18 @@ def _linesearch(
     return H, H_loss, current_eta
 
 
-def _analytic_solver(  # ***
-    omic1,
-    omic2,
+def _analytic_solver(
+    omics,
     Y,
     W,
-    H1,
-    H2,
-    Beta1,
-    Beta2,
+    Hs,
+    Betas,
     params,
     as_sklearn,
     backtrack,
     max_iter_back,
-    H1_loss,
-    H2_loss,
-    H1_grad,
-    H2_grad,
+    Hlosses,
+    Hgrads,
     K,
     K_gp,
     verbose,
@@ -508,18 +493,13 @@ def _analytic_solver(  # ***
     See (Fevotte, 2011) and sklearn.decomposition.NMF source code for
     choice of parameters.
     """
-    omics = [omic1, omic2]  # ***
-    Hs = [H1, H2]  # ***
-    Betas = [Beta1, Beta2]  # ***
-    Hlosses = [H1_loss, H2_loss]  # ***
-    Hgrads = [H1_grad, H2_grad]  # ***
     etas = [params["eta1"], params["eta2"]]  # ***
 
     # W update with new values of Hs
-    W_new = _update_W(W, omics[0], omics[1], Hs[0], Hs[1], params["mu"])  # ***
+    W_new = _update_W(W, omics, Hs, params["mu"])
 
     # Hs updates (either MU or Proximal)
-    Hs_new = [0] * len(omics)
+    Hs_new = [0 for j in range(len(omics))]
     for j in range(len(omics)):
         Hs_new[j], Hgrads[j] = _update_H(
             Hs[j],
@@ -589,29 +569,17 @@ def _analytic_solver(  # ***
         if K_gp < K:
             Betas_new[j][K_gp:] = 0
 
-    H1_new = Hs_new[0]  # ***
-    H2_new = Hs_new[1]  # ***
-    Beta1_new = Betas_new[0]  # ***
-    Beta2_new = Betas_new[1]  # ***
     params["eta1"] = etas[0]  # ***
     params["eta2"] = etas[1]  # ***
-    H1_loss = Hlosses[0]  # ***
-    H2_loss = Hlosses[1]  # ***
-    H1_grad = Hgrads[0]  # ***
-    H2_grad = Hgrads[1]  # ***
 
-    return (  # ***
+    return (
         W_new,
-        H1_new,
-        H2_new,
-        Beta1_new,
-        Beta2_new,
-        params["eta1"],
-        params["eta2"],
-        H1_loss,
-        H2_loss,
-        H1_grad,
-        H2_grad,
+        Hs_new,
+        Betas_new,
+        params["eta1"],  # ***
+        params["eta2"],  # ***
+        Hlosses,
+        Hgrads,
     )
 
 
@@ -834,10 +802,9 @@ class NMFProfiler:
     >>> model.barplot_error(height=6, width=15, path="")
     """
 
-    def __init__(  # ***
+    def __init__(
         self,
-        omic1,
-        omic2,
+        omics,
         y,
         params={  # ***
             "gamma": 1e-2,
@@ -860,8 +827,7 @@ class NMFProfiler:
         verbose=False,
     ):
 
-        self.omic1 = omic1  # ***
-        self.omic2 = omic2  # ***
+        self.omics = omics
         self.y = y
         self.params = params
         self.init_method = init_method
@@ -874,18 +840,18 @@ class NMFProfiler:
         self.seed = seed
         self.verbose = verbose
 
-    def __str__(self):  # ***
+    def __str__(self):
         """Briefly describe inputs and outputs of NMFProfiler."""
         print_statement = (
-            "NMFProfiler\n-----------\n\nAnalysis run on dataset 1 containing "
-            f"{self.omic1.shape[1]} features measured on {self.omic1.shape[0]}"
-            f" samples,\ndataset 2 containing {self.omic2.shape[1]} features "
-            f"measured on the same {self.omic2.shape[0]} samples.\nSamples "
-            f"are splitted into {len(np.unique(self.y))} distinct groups.\n\n"
-            f"NMFProfiler (as_sklearn = {self.as_sklearn}) extracted "
-            f"{len(np.unique(self.y))} profiles in {self.runningtime} "
-            "seconds.\n\nFor more information, please refer to help(), "
-            "GitLab page or contact aurelie.mercadie@inrae.fr."
+            f"NMFProfiler\n-----------\n\nAnalysis run on {len(self.omics)} "
+            "datasets containing respectively "
+            f"{*[self.omics[j].shape[1] for j in range(len(self.omics))],} "
+            f"features measured on the same {self.omics[0].shape[0]} samples."
+            f"\nSamples are splitted into {len(np.unique(self.y))} distinct"
+            f" groups.\n\nNMFProfiler (as_sklearn = {self.as_sklearn}) "
+            f"extracted {len(np.unique(self.y))} profiles "
+            f"in {self.runningtime} seconds.\n\nFor more information, please "
+            "refer to help() or GitLab page."
         )
         return print_statement
 
@@ -908,20 +874,17 @@ class NMFProfiler:
         Apply a min-max normalization and divide by the square root
         of the number of features.
         """
-        omics = [self.omic1, self.omic2]  # ***
-
         # For each dataset
-        for j in range(len(omics)):  # ***
-            for i in range(omics[j].shape[1]):  # ***
-                omics[j][:, i] = (  # ***
-                    omics[j][:, i] - np.min(omics[j][:, i])  # ***
+        for j in range(len(self.omics)):
+            for i in range(self.omics[j].shape[1]):
+                self.omics[j][:, i] = (
+                    self.omics[j][:, i] - np.min(self.omics[j][:, i])
                 ) / (
-                    np.max(omics[j][:, i]) - np.min(omics[j][:, i])  # ***
+                    np.max(self.omics[j][:, i]) - np.min(self.omics[j][:, i])
                 )
-            omics[j] = omics[j] * (1 / np.sqrt(omics[j].shape[1]))  # ***
-
-        self.omic1 = omics[0]  # ***
-        self.omic2 = omics[1]  # ***
+            self.omics[j] = (
+                self.omics[j] * (1 / np.sqrt(self.omics[j].shape[1]))
+            )
 
     def _initialize_w_h_beta(self):
         """Initialize matrices W, H^j and Beta^j.
@@ -933,8 +896,6 @@ class NMFProfiler:
         ----
         Based on _initialize_nmf of sklearn.decomposition.NMF.
         """
-        omicstemp = [self.omic1, self.omic2]  # ***
-
         # Extract K, the number of latent components, as equal to
         # the number of distinct groups in y
         K = len(np.unique(self.y))
@@ -944,9 +905,9 @@ class NMFProfiler:
 
         # For W, H1 and H2:
         # 1.0. Initialize H0s list
-        H0s = [0 for j in range(len(omicstemp))]  # ***
+        H0s = [0 for j in range(len(self.omics))]
         # 1.1. Concatenate omics data sets
-        omics = np.concatenate(omicstemp, axis=1)  # ***
+        omics = np.concatenate(self.omics, axis=1)
         # 1.2. Initialize using sklearn function
         if self.init_method == "random2":
             # 1.2a. Initialize W with both omics and Hj
@@ -957,9 +918,9 @@ class NMFProfiler:
                 init="random",
                 random_state=self.seed
             )
-            for j in range(len(omicstemp)):  # ***
+            for j in range(len(self.omics)):
                 *_, H0s[j] = _initialize_nmf(
-                    X=omicstemp[j],  # ***
+                    X=self.omics[j],
                     n_components=K,
                     init="random",
                     random_state=self.seed
@@ -969,12 +930,12 @@ class NMFProfiler:
             # 1.2b. FOR IDENTICAL OMICS DATA SETS - Initialize W
             # with both omics, H1 with omic1 and H2 as H1 (random)
             W0, H0s[0] = _initialize_nmf(
-                X=omicstemp[0],  # ***
+                X=self.omics[0],
                 n_components=K,
                 init="random",
                 random_state=self.seed
             )
-            for j in range(1, len(omicstemp)):  # ***
+            for j in range(1, len(self.omics)):
                 H0s[j] = H0s[0].copy()
         elif self.init_method == "nndsvd2":
             # 1.2c. Initialize W with both omics and Hj
@@ -985,9 +946,9 @@ class NMFProfiler:
                 init="nndsvd",
                 random_state=self.seed
             )
-            for j in range(len(omicstemp)):  # ***
+            for j in range(len(self.omics)):
                 *_, H0s[j] = _initialize_nmf(
-                    X=omicstemp[j],  # ***
+                    X=self.omics[j],
                     n_components=K,
                     init="nndsvd",
                     random_state=self.seed
@@ -996,15 +957,15 @@ class NMFProfiler:
         elif self.init_method == "nndsvd3":
             # 1.2d. Initialize W with each omic then take the mean and
             # initialize Hj with specific omic (nndsvd)
-            W_js = [0 for j in range(len(omics))]  # ***
-            for j in range(len(omicstemp)):  # ***
+            W_js = [0 for j in range(len(self.omics))]
+            for j in range(len(self.omics)):
                 W_js[j], H0s[j] = _initialize_nmf(
-                    X=omicstemp[j],  # ***
+                    X=self.omics[j],
                     n_components=K,
                     init="nndsvd",
                     random_state=self.seed
                 )
-            W0 = (1 / len(omicstemp)) * (sum(W_js))  # ***
+            W0 = (1 / len(self.omics)) * (sum(W_js))
             del W_js
         else:
             # 1.2e. Initialize both with all omics, using whatever method
@@ -1016,11 +977,11 @@ class NMFProfiler:
                 random_state=self.seed
             )
             # 1.2e. Split H matrix
-            if len(omicstemp) == 2:
-                indices_or_sections = [omicstemp[0].shape[1]]  # ***
+            if len(self.omics) == 2:
+                indices_or_sections = [self.omics[0].shape[1]]
             else:
                 indices_or_sections = [
-                    omicstemp[j].shape[1] for j in range(len(omicstemp))  # ***
+                    self.omics[j].shape[1] for j in range(len(self.omics))
                 ]
             H0s = np.split(
                 ary=H0,
@@ -1030,19 +991,14 @@ class NMFProfiler:
             del H0
 
         # For Betas:
-        Beta0s = [0 for j in range(len(omics))]  # ***
-        for j in range(len(omicstemp)):  # ***
+        Beta0s = [0 for j in range(len(self.omics))]
+        for j in range(len(self.omics)):
             Beta0s[j] = np.repeat(1, K)
             # Put to zero all coefficients linked to component k when k > K_gp
             if K_gp < K:
                 Beta0s[j][K_gp:] = 0
 
-        H1_0 = H0s[0]  # ***
-        H2_0 = H0s[1]  # ***
-        Beta1_0 = Beta0s[0]  # ***
-        Beta2_0 = Beta0s[1]  # ***
-
-        return W0, H1_0, H2_0, Beta1_0, Beta2_0  # ***
+        return W0, H0s, Beta0s
 
     def fit(self):
         """Run NMFProfiler."""
@@ -1063,19 +1019,12 @@ class NMFProfiler:
         # Pre-process datasets (with min-max and number of features)
         self._preprocess_data()
 
-        omics = [self.omic1, self.omic2]  # ***
-
         # Initialize matrices
-        W0, H1_0, H2_0, Beta1_0, Beta2_0 = self._initialize_w_h_beta()  # ***
-
-        Hs_0 = [H1_0, H2_0]  # ***
-        Betas_0 = [Beta1_0, Beta2_0]  # ***
+        W0, Hs_0, Betas_0 = self._initialize_w_h_beta()
 
         self.W_init = W0
-        self.H1_init = Hs_0[0]  # ***
-        self.H2_init = Hs_0[1]  # ***
-        self.Beta1_init = Betas_0[0]  # ***
-        self.Beta2_init = Betas_0[1]  # ***
+        self.Hs_init = Hs_0
+        self.Betas_init = Betas_0
 
         # Update lambda and gamma given sample size
         self._update_params()
@@ -1086,8 +1035,8 @@ class NMFProfiler:
         # Create matrices and vectors to update using initialization
         # (and keep intact initialized matrices and vectors)
         W = deepcopy(W0)
-        Hs = [deepcopy(Hs_0[j]) for j in range(len(omics))]  # ***
-        Betas = [deepcopy(Betas_0[j]) for j in range(len(omics))]  # ***
+        Hs = [deepcopy(Hs_0[j]) for j in range(len(self.omics))]
+        Betas = [deepcopy(Betas_0[j]) for j in range(len(self.omics))]
 
         # Create lists of error terms to enrich iteratively
         (error, regul, nb_iters) = ([] for i in range(3))
@@ -1097,7 +1046,7 @@ class NMFProfiler:
             distorts,
             sparsitys,
             preds,
-        ) = ([[] for j in range(len(omics))] for _ in range(5))  # ***
+        ) = ([[] for j in range(len(self.omics))] for _ in range(5))
 
         # Create lists of LDA performance indicators to enrich iteratively
         (
@@ -1105,18 +1054,15 @@ class NMFProfiler:
             BICs,
             AICs,
             F_pvals,
-        ) = ([[] for j in range(len(omics))] for _ in range(4))  # ***
-        bets = [[[] for k in range(K)] for j in range(len(omics))]  # ***
+        ) = ([[] for j in range(len(self.omics))] for _ in range(4))
+        bets = [[[] for k in range(K)] for j in range(len(self.omics))]
 
-        loss_init = _computeF(  # ***
-            omics[0],
-            omics[1],
+        loss_init = _computeF(
+            self.omics,
             Y,
             W0,
-            Hs_0[0],
-            Hs_0[1],
-            Betas_0[0],
-            Betas_0[1],
+            Hs_0,
+            Betas_0,
             self.params["mu"],
             self.params["lambda"],
             self.params["gamma"],
@@ -1125,20 +1071,18 @@ class NMFProfiler:
         )
         error.append(loss_init[0])
         nb_iters.append(0)
-        distorts[0].append(loss_init[1])  # ***
-        distorts[1].append(loss_init[2])  # ***
-        sparsitys[0].append(loss_init[3])  # ***
-        sparsitys[1].append(loss_init[4])  # ***
-        regul.append(loss_init[5])  # ***
-        preds[0].append(loss_init[6])  # ***
-        preds[1].append(loss_init[7])  # ***
+        regul.append(loss_init[3])
+        for j in range(len(self.omics)):
+            distorts[j].append(loss_init[1][j])
+            sparsitys[j].append(loss_init[2][j])
+            preds[j].append(loss_init[4][j])
 
         # Keep track of marginal objective functions in Hj with
         # initial matrices (necessary for linesearch first execution)
-        for j in range(len(omics)):  # ***
+        for j in range(len(self.omics)):
             margHs[j].append(
                 _computeMargH(
-                    omics[j],
+                    self.omics[j],
                     Y,
                     W0,
                     Hs_0[j],
@@ -1150,13 +1094,14 @@ class NMFProfiler:
             )
 
         # LDAs with initial matrices
-        regs_init = [0 for j in range(len(omics))]  # ***
-        for j in range(len(omics)):  # ***
+        regs_init = [0 for j in range(len(self.omics))]
+        for j in range(len(self.omics)):
             regs_init[j] = sm.OLS(
-                self.y, sm.add_constant(safe_sparse_dot(omics[j], Hs_0[j].T))
+                self.y,
+                sm.add_constant(safe_sparse_dot(self.omics[j], Hs_0[j].T))
             ).fit()
 
-        for j in range(len(omics)):  # ***
+        for j in range(len(self.omics)):
             for k in range(K):
                 bets[j][k].append(regs_init[j].params[k+1])
             R2adjs[j].append(regs_init[j].rsquared_adj)
@@ -1170,8 +1115,8 @@ class NMFProfiler:
 
         # To keep track of the choice of etas
         etas_init = [self.params["eta1"], self.params["eta2"]]  # ***
-        etas = [[] for j in range(len(omics))]  # ***
-        for j in range(len(omics)):  # ***
+        etas = [[] for j in range(len(self.omics))]
+        for j in range(len(self.omics)):
             etas[j].append(etas_init[j])
 
         # Begin the optimization
@@ -1182,35 +1127,26 @@ class NMFProfiler:
 
             # Analytical solver...
             if self.solver == "analytical":
-                (  # ***
+                (
                     W,
-                    Hs[0],
-                    Hs[1],
-                    Betas[0],
-                    Betas[1],
+                    Hs,
+                    Betas,
                     self.params["eta1"],
                     self.params["eta2"],
-                    margHs[0],
-                    margHs[1],
-                    gradHs[0],
-                    gradHs[1],
+                    margHs,
+                    gradHs,
                 ) = _analytic_solver(
-                    omics[0],
-                    omics[1],
+                    self.omics,
                     Y,
                     W,
-                    Hs[0],
-                    Hs[1],
-                    Betas[0],
-                    Betas[1],
+                    Hs,
+                    Betas,
                     self.params,
                     as_sklearn=self.as_sklearn,
                     backtrack=self.backtrack,
                     max_iter_back=self.max_iter_back,
-                    H1_loss=margHs[0],
-                    H2_loss=margHs[1],
-                    H1_grad=gradHs[0],
-                    H2_grad=gradHs[1],
+                    Hlosses=margHs,
+                    Hgrads=gradHs,
                     K=K,
                     K_gp=K_gp,
                     verbose=self.verbose,
@@ -1218,7 +1154,7 @@ class NMFProfiler:
 
             # ... or Autograd solver
             else:
-                W, Hs[0], Hs[1], Betas[0], Betas[1] = _autograd_solver()  # ***
+                W, Hs, Betas = _autograd_solver()
 
             # Keep track of optimal etas
             etas_current = [self.params["eta1"], self.params["eta2"]]  # ***
@@ -1226,15 +1162,12 @@ class NMFProfiler:
                 etas[j].append(etas_current[j])  # ***
 
             # Compute the new loss as well as specific terms
-            loss_ = _computeF(  # ***
-                omics[0],
-                omics[1],
+            loss_ = _computeF(
+                self.omics,
                 Y,
                 W,
-                Hs[0],
-                Hs[1],
-                Betas[0],
-                Betas[1],
+                Hs,
+                Betas,
                 self.params["mu"],
                 self.params["lambda"],
                 self.params["gamma"],
@@ -1243,22 +1176,21 @@ class NMFProfiler:
             )
             error.append(loss_[0])
             nb_iters.append(n_iter)
-            distorts[0].append(loss_[1])  # ***
-            distorts[1].append(loss_[2])  # ***
-            sparsitys[0].append(loss_[3])  # ***
-            sparsitys[1].append(loss_[4])  # ***
-            regul.append(loss_[5])  # ***
-            preds[0].append(loss_[6])  # ***
-            preds[1].append(loss_[7])  # ***
+            regul.append(loss_[3])
+            for j in range(len(self.omics)):
+                distorts[j].append(loss_[1][j])
+                sparsitys[j].append(loss_[2][j])
+                preds[j].append(loss_[4][j])
 
             # Monitor the LDA part
-            regs = [0 for j in range(len(omics))]  # ***
-            for j in range(len(omics)):  # ***
+            regs = [0 for j in range(len(self.omics))]
+            for j in range(len(self.omics)):
                 regs[j] = sm.OLS(
-                    self.y, sm.add_constant(safe_sparse_dot(omics[j], Hs[j].T))
+                    self.y,
+                    sm.add_constant(safe_sparse_dot(self.omics[j], Hs[j].T))
                 ).fit()
 
-            for j in range(len(omics)):
+            for j in range(len(self.omics)):
                 for k in range(K):
                     bets[j][k].append(regs[j].params[k+1])
                 R2adjs[j].append(regs[j].rsquared_adj)
@@ -1309,15 +1241,13 @@ class NMFProfiler:
         # --------------------------------
         self.n_iter = n_iter
         self.W = W
-        self.H1 = Hs[0]  # ***
-        self.H2 = Hs[1]  # ***
-        self.Beta1 = Betas[0]  # ***
-        self.Beta2 = Betas[1]  # ***
+        self.Hs = Hs
+        self.Betas = Betas
 
         # Keep track of final Hj gradients and build the Hj gradients matrix
         # (following up their evolution during optimization)
         # ------------------------------------------------------------------
-        for j in range(len(omics)):  # ***
+        for j in range(len(self.omics)):
             gradHs[j].append(
                 multi_dot([W.T, W, Hs[j]])
                 + (
@@ -1327,34 +1257,36 @@ class NMFProfiler:
                             np.transpose(Betas[j][newaxis]),
                             Betas[j][newaxis],
                             Hs[j],
-                            omics[j].T,
-                            omics[j],
+                            self.omics[j].T,
+                            self.omics[j],
                         ]
                     )
                 )
                 - (
-                    safe_sparse_dot(W.T, omics[j])
+                    safe_sparse_dot(W.T, self.omics[j])
                     + (
                         self.params["gamma"]
                         * multi_dot(
                             [
                                 np.transpose(Betas[j][newaxis]),
                                 self.y[newaxis],
-                                omics[j]
+                                self.omics[j]
                             ]
                         )
                     )
                 )
             )
 
-        grads = np.array(  # ***
+        grads = np.array(
             [
-                [norm(i) ** 2 for i in gradHs[j]] for j in range(len(omics))
+                [
+                    norm(i) ** 2 for i in gradHs[j]
+                ] for j in range(len(self.omics))
             ]
         ).T
         self.df_grads = pd.DataFrame(
             grads,
-            columns=[f'grad_H{j+1}' for j in range(len(omics))]  # ***
+            columns=[f'grad_H{j+1}' for j in range(len(self.omics))]
         )
 
         # Build the error terms matrix
@@ -1373,10 +1305,10 @@ class NMFProfiler:
             error_terms,
             columns=list(itertools.chain.from_iterable([
                 ["iter"],
-                [f"distort{j+1}" for j in range(len(omics))],  # ***
-                [f"sparsity{j+1}" for j in range(len(omics))],  # ***
+                [f"distort{j+1}" for j in range(len(self.omics))],
+                [f"sparsity{j+1}" for j in range(len(self.omics))],
                 ["regul"],
-                [f"pred{j+1}" for j in range(len(omics))],  # ***
+                [f"pred{j+1}" for j in range(len(self.omics))],
                 ["loss"],
             ])),
         )
@@ -1384,7 +1316,7 @@ class NMFProfiler:
         # Build the LDA performance matrix
         # --------------------------------
         LDA_perf = np.vstack(nb_iters)
-        for j in range(len(omics)):
+        for j in range(len(self.omics)):
             bets_j = np.array(bets[j]).T
             perf_j = np.array(
                 [
@@ -1407,7 +1339,7 @@ class NMFProfiler:
                     f"BIC (omic{j+1})",
                     f"AIC (omic{j+1})",
                     f"F-statistic p-value (omic{j+1})"
-                ] for j in range(len(omics))])),  # ***
+                ] for j in range(len(self.omics))])),
             ])),
         )
 
@@ -1416,7 +1348,7 @@ class NMFProfiler:
         # -------------------------------------------------
         self.df_etas = pd.DataFrame(
             np.array(etas).T,
-            columns=[f"eta_omic{j+1}" for j in range(len(omics))]  # ***
+            columns=[f"eta_omic{j+1}" for j in range(len(self.omics))]
         )
 
         return self
@@ -1436,8 +1368,6 @@ class NMFProfiler:
             in each omic.
         :projs: list of arrays. Projection onto each Hj matrix
         """
-        Hs = [self.H1, self.H2]  # ***
-
         # Initialize projection list and compute
         projs = []
         for j in len(new_ind):
@@ -1445,7 +1375,7 @@ class NMFProfiler:
             new_ind_Xj = new_ind[j][newaxis]
 
             # Compute projections of the new samples onto dictionary matrices
-            projs.append(safe_sparse_dot(new_ind_Xj, Hs[j].T))
+            projs.append(safe_sparse_dot(new_ind_Xj, self.Hs[j].T))
 
         # For each omic, find which component gave the highest score
         group = [projs[j].argmax() for j in len(new_ind)]
@@ -1475,7 +1405,7 @@ class NMFProfiler:
         ------
         Return a barplot of the different error terms.
         """
-        J = len([self.omic1, self.omic2])  # ***
+        J = len(self.omics)
         data_barplot = np.array([
             [
                 *[f"reconstruction omic{j+1}" for j in range(J)],
@@ -1546,15 +1476,15 @@ class NMFProfiler:
         """
         plt.figure(figsize=(width, height))
 
-        omics = [self.omic1, self.omic2]  # ***
-        Hs = [self.H1, self.H2]  # ***
-
         if obj_to_viz == "W":
             sns.heatmap(pd.DataFrame(self.W), cmap="viridis")
         elif obj_to_viz == "omic":
-            sns.heatmap(pd.DataFrame(omics[(omic_number-1)]), cmap="viridis")
+            sns.heatmap(
+                pd.DataFrame(self.omics[(omic_number-1)]),
+                cmap="viridis"
+            )
         elif obj_to_viz == "H":
-            sns.heatmap(pd.DataFrame(Hs[(omic_number-1)]), cmap="viridis")
+            sns.heatmap(pd.DataFrame(self.Hs[(omic_number-1)]), cmap="viridis")
         else:
             raise Exception(
                 "Cannot plot this object, please change 'obj_to_viz' input."
diff --git a/nmfprofiler/utils/validate_inputs.py b/nmfprofiler/utils/validate_inputs.py
index e817e12..51a94ef 100644
--- a/nmfprofiler/utils/validate_inputs.py
+++ b/nmfprofiler/utils/validate_inputs.py
@@ -22,7 +22,7 @@ def _check_modalities(input, modalities):
 def _check_inputs_NMFProfiler(X):
     """Check inputs of NMFProfiler (i.e. omic1, omic2, and so on).
     """
-    params_names = [
+    params_names = [  # ***
         "alpha",
         "eta1",
         "eta2",
@@ -43,20 +43,19 @@ def _check_inputs_NMFProfiler(X):
         'nndsvdar'
     ]
 
-    omics = [X.omic1, X.omic2]
-    for j in range(len(omics)):
-        if _check_type(omics[j], np.ndarray) is False:
+    for j in range(len(X.omics)):
+        if _check_type(X.omics[j], np.ndarray) is False:
             raise TypeError(
-                f"Received {type(omics[j])}, "
+                f"Received {type(X.omics[j])}, "
                 f"item number {j+1} in 'omics' (i.e. omic{j+1}) "
                 "should be a numpy.ndarray."
             )
-        if omics[0].shape[0] != omics[j].shape[0]:  # check dimensions
+        if X.omics[0].shape[0] != X.omics[j].shape[0]:  # check dimensions
             raise Exception(
                 "All datasets should have the exact same samples, but "
-                f"omic1 (item 1 in 'omics') has {omics[0].shape[0]} samples "
+                f"omic1 (item 1 in 'omics') has {X.omics[0].shape[0]} samples "
                 f"and omic{j+1} (item {j+1} in 'omics') has "
-                f"{omics[j].shape[0]} samples. Please check datasets."
+                f"{X.omics[j].shape[0]} samples. Please check datasets."
             )
 
     if _check_type(X.y, np.ndarray) is False:
@@ -64,10 +63,10 @@ def _check_inputs_NMFProfiler(X):
             f"Received {type(X.y)}, "
             "'y' should be a 1D numpy.ndarray."
         )
-    if X.y.shape[0] != omics[0].shape[0]:  # check dimensions
+    if X.y.shape[0] != X.omics[0].shape[0]:  # check dimensions
         raise Exception(
             f"Group vector y has {X.y.shape[0]} samples but "
-            f"omic1 (item 1 in 'omics') has {omics[0].shape[0]} samples. "
+            f"omic1 (item 1 in 'omics') has {X.omics[0].shape[0]} samples. "
             "Please check datasets."
         )
     if len(np.unique(X.y)) < 2:  # check y values
@@ -85,7 +84,7 @@ def _check_inputs_NMFProfiler(X):
             "One or more essential key-value pair(s) missing. "
             "See documentation."
         )
-    if len(X.params) > 8:
+    if len(X.params) > 8:  # ***
         raise ValueError(
             "Extra key-value pair(s), 'params' should be of length 8. "
             "See documentation."
@@ -96,11 +95,11 @@ def _check_inputs_NMFProfiler(X):
             "'alpha' should be a positive integer."
         )
 
-    etas = [X.params['eta1'], X.params['eta2']]
-    for j in range(len(etas)):
-        if _check_type(etas[j], float) is False or etas[j] <= 0:
+    etas = [X.params['eta1'], X.params['eta2']]  # ***
+    for j in range(len(etas)):  # ***
+        if _check_type(etas[j], float) is False or etas[j] <= 0:  # ***
             raise TypeError(
-                f"Received {etas[j]} {type(etas[j])}, "
+                f"Received {etas[j]} {type(etas[j])}, "  # ***
                 f"eta {j+1} (item {j+1} in 'params['etas']') "
                 "should be a positive float."
             )
-- 
GitLab


From ff567aca6974e13bd46ea5b7479aff5a321120cf Mon Sep 17 00:00:00 2001
From: Aurelie Mercadie <aurelie.mercadie@inrae.fr>
Date: Wed, 27 Nov 2024 16:46:04 +0100
Subject: [PATCH 13/19] adapted attributes

---
 tests/test_nmfprofiler.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tests/test_nmfprofiler.py b/tests/test_nmfprofiler.py
index c096c55..ab5bde3 100644
--- a/tests/test_nmfprofiler.py
+++ b/tests/test_nmfprofiler.py
@@ -662,7 +662,7 @@ seed = 240709
 
 
 def test_nmfprofiler_mu():
-    model = NMFProfiler(omic1=omic1, omic2=omic2, y=y, seed=seed)
+    model = NMFProfiler(omics=[omic1, omic2], y=y, seed=seed)
     res = model.fit()
     assert (
         np.round(res.df_errors.iloc[-1, -1], decimals=4) == 0.9604
@@ -671,8 +671,7 @@ def test_nmfprofiler_mu():
 
 def test_nmfprofiler_prox():
     model = NMFProfiler(
-        omic1=omic1,
-        omic2=omic2,
+        omics=[omic1, omic2],
         y=y,
         seed=seed,
         as_sklearn=False,
-- 
GitLab


From 1e8bbd46ee6190173e5ee4c45972289803a54c74 Mon Sep 17 00:00:00 2001
From: Aurelie Mercadie <aurelie.mercadie@inrae.fr>
Date: Wed, 27 Nov 2024 16:50:05 +0100
Subject: [PATCH 14/19] updated README given attribute changes

---
 README.md | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 644ae73..b87d9e2 100644
--- a/README.md
+++ b/README.md
@@ -29,8 +29,7 @@ seed = 240820
 
 # Run NMFProfiler
 model = NMFProfiler(
-    omic1=ToyExample().omic1,
-    omic2=ToyExample().omic2,
+    omics=[ToyExample().omic1, ToyExample().omic2],
     y=ToyExample().y,
     seed=seed,
     as_sklearn=False,
@@ -42,8 +41,8 @@ print(res)
 
 # Visualize analyzed datasets (samples x features)
 ToyExample().y  # 2 groups
-res.heatmap(obj_to_viz="omic1", width=15, height=6, path="")
-res.heatmap(obj_to_viz="omic2", width=15, height=6, path="")
+res.heatmap(obj_to_viz="omic", width=15, height=6, path="", omic_number=1)
+res.heatmap(obj_to_viz="omic", width=15, height=6, path="", omic_number=2)
 ```
 
 ![](images/omic1_Heatmap.png)
@@ -60,8 +59,8 @@ res.heatmap(obj_to_viz="W", width=10, height=10, path="")
 
 ```python
 # Visualize signature matrices H1 and H2 obtained (2 x features)
-res.heatmap(obj_to_viz="H1", width=15, height=6, path="")
-res.heatmap(obj_to_viz="H2", width=15, height=6, path="")
+res.heatmap(obj_to_viz="H", width=15, height=6, path="", omic_number=1)
+res.heatmap(obj_to_viz="H", width=15, height=6, path="", omic_number=2)
 ```
 
 ![](images/H1_Heatmap.png)
-- 
GitLab


From 2c8a631db23a83ae2c7de5fc4de69972f128f193 Mon Sep 17 00:00:00 2001
From: Aurelie Mercadie <aurelie.mercadie@inrae.fr>
Date: Wed, 27 Nov 2024 17:44:56 +0100
Subject: [PATCH 15/19] reduced set of params + adapted regarding eta(s)

---
 nmfprofiler/nmfprofiler.py           | 34 ++++++++++++----------------
 nmfprofiler/utils/validate_inputs.py | 24 ++++++++------------
 2 files changed, 24 insertions(+), 34 deletions(-)

diff --git a/nmfprofiler/nmfprofiler.py b/nmfprofiler/nmfprofiler.py
index 0380bdd..f1c092e 100644
--- a/nmfprofiler/nmfprofiler.py
+++ b/nmfprofiler/nmfprofiler.py
@@ -415,6 +415,7 @@ def _analytic_solver(
     Hs,
     Betas,
     params,
+    current_etas,
     as_sklearn,
     backtrack,
     max_iter_back,
@@ -455,6 +456,7 @@ def _analytic_solver(
         `alpha`, `sigma`, `m_back` for linesearch().
         By default, gamma = 0.005, lambda = 1e-3, mu = 1e-3, eta1 = eta2 = 1,
         alpha = 2, sigma = 1e-9 and m_back = 1.
+    :current_etas: DESCRIBE.
     :as_sklearn: boolean, whether or not a modified version of the MU updates
         from scikit-learn is used.
     :backtrack: boolean, if Backtrack LineSearch is performed or not.
@@ -493,8 +495,6 @@ def _analytic_solver(
     See (Fevotte, 2011) and sklearn.decomposition.NMF source code for
     choice of parameters.
     """
-    etas = [params["eta1"], params["eta2"]]  # ***
-
     # W update with new values of Hs
     W_new = _update_W(W, omics, Hs, params["mu"])
 
@@ -508,7 +508,7 @@ def _analytic_solver(
             Betas[j],
             Y,
             params["gamma"],
-            etas[j],
+            current_etas[j],
             params["lambda"],
             as_sklearn=as_sklearn,
             grad=True,
@@ -530,7 +530,7 @@ def _analytic_solver(
                     K_gp
                 )
             )
-            Hs_new[j], Hlosses[j], etas[j] = _linesearch(
+            Hs_new[j], Hlosses[j], current_etas[j] = _linesearch(
                 H_old=Hs[j],
                 H=Hs_new[j],
                 W=W_new,
@@ -539,7 +539,7 @@ def _analytic_solver(
                 Y=Y,
                 gamma=params["gamma"],
                 lambdA=params["lambda"],
-                current_eta=etas[j],
+                current_eta=current_etas[j],
                 H_loss=Hlosses[j],
                 alpha=params["alpha"],
                 sigma=params["sigma"],
@@ -569,15 +569,11 @@ def _analytic_solver(
         if K_gp < K:
             Betas_new[j][K_gp:] = 0
 
-    params["eta1"] = etas[0]  # ***
-    params["eta2"] = etas[1]  # ***
-
     return (
         W_new,
         Hs_new,
         Betas_new,
-        params["eta1"],  # ***
-        params["eta2"],  # ***
+        current_etas,
         Hlosses,
         Hgrads,
     )
@@ -806,12 +802,11 @@ class NMFProfiler:
         self,
         omics,
         y,
-        params={  # ***
+        params={
             "gamma": 1e-2,
             "lambda": 1e-3,
             "mu": 1e-3,
-            "eta1": 1.00,
-            "eta2": 1.00,
+            "eta": 1.00,
             "alpha": 2,
             "sigma": 1e-9,
             "m_back": 1,
@@ -1114,7 +1109,7 @@ class NMFProfiler:
             print("Error after initialization step: %f" % (error[0]))
 
         # To keep track of the choice of etas
-        etas_init = [self.params["eta1"], self.params["eta2"]]  # ***
+        etas_init = [self.params["eta"] for j in range(len(self.omics))]
         etas = [[] for j in range(len(self.omics))]
         for j in range(len(self.omics)):
             etas[j].append(etas_init[j])
@@ -1131,8 +1126,7 @@ class NMFProfiler:
                     W,
                     Hs,
                     Betas,
-                    self.params["eta1"],
-                    self.params["eta2"],
+                    current_etas,
                     margHs,
                     gradHs,
                 ) = _analytic_solver(
@@ -1142,6 +1136,9 @@ class NMFProfiler:
                     Hs,
                     Betas,
                     self.params,
+                    current_etas=[
+                        etas[j][-1] for j in range(len(self.omics))
+                    ],
                     as_sklearn=self.as_sklearn,
                     backtrack=self.backtrack,
                     max_iter_back=self.max_iter_back,
@@ -1157,9 +1154,8 @@ class NMFProfiler:
                 W, Hs, Betas = _autograd_solver()
 
             # Keep track of optimal etas
-            etas_current = [self.params["eta1"], self.params["eta2"]]  # ***
-            for j in range(len(etas_current)):  # ***
-                etas[j].append(etas_current[j])  # ***
+            for j in range(len(self.omics)):
+                etas[j].append(current_etas[j])
 
             # Compute the new loss as well as specific terms
             loss_ = _computeF(
diff --git a/nmfprofiler/utils/validate_inputs.py b/nmfprofiler/utils/validate_inputs.py
index 51a94ef..7e221e8 100644
--- a/nmfprofiler/utils/validate_inputs.py
+++ b/nmfprofiler/utils/validate_inputs.py
@@ -22,10 +22,9 @@ def _check_modalities(input, modalities):
 def _check_inputs_NMFProfiler(X):
     """Check inputs of NMFProfiler (i.e. omic1, omic2, and so on).
     """
-    params_names = [  # ***
+    params_names = [
         "alpha",
-        "eta1",
-        "eta2",
+        "eta",
         "gamma",
         "lambda",
         "m_back",
@@ -84,9 +83,9 @@ def _check_inputs_NMFProfiler(X):
             "One or more essential key-value pair(s) missing. "
             "See documentation."
         )
-    if len(X.params) > 8:  # ***
+    if len(X.params) > 7:
         raise ValueError(
-            "Extra key-value pair(s), 'params' should be of length 8. "
+            "Extra key-value pair(s), 'params' should be of length 7. "
             "See documentation."
         )
     if _check_type(X.params['alpha'], int) is False or X.params['alpha'] < 1:
@@ -94,16 +93,11 @@ def _check_inputs_NMFProfiler(X):
             f"Received {X.params['alpha']} {type(X.params['alpha'])}, "
             "'alpha' should be a positive integer."
         )
-
-    etas = [X.params['eta1'], X.params['eta2']]  # ***
-    for j in range(len(etas)):  # ***
-        if _check_type(etas[j], float) is False or etas[j] <= 0:  # ***
-            raise TypeError(
-                f"Received {etas[j]} {type(etas[j])}, "  # ***
-                f"eta {j+1} (item {j+1} in 'params['etas']') "
-                "should be a positive float."
-            )
-
+    if _check_type(X.params['eta'], float) is False or X.params['eta'] <= 0:
+        raise TypeError(
+            f"Received {X.params['eta']} {type(X.params['eta'])}, "
+            "'eta' should be a positive float."
+        )
     if _check_type(X.params['gamma'], float) is False or X.params['gamma'] < 0:
         raise TypeError(
             f"Received {X.params['gamma']} {type(X.params['gamma'])}, "
-- 
GitLab


From 783b65df0eceb75f815d356b11bfc316de1e0419 Mon Sep 17 00:00:00 2001
From: cebrouard <celinebrouard@MacBook-Pro-de-cebrouard.local>
Date: Thu, 28 Nov 2024 14:17:18 +0100
Subject: [PATCH 16/19] changed all documentations for more than 2 omics

---
 nmfprofiler/nmfprofiler.py | 179 ++++++++++++++-----------------------
 1 file changed, 66 insertions(+), 113 deletions(-)

diff --git a/nmfprofiler/nmfprofiler.py b/nmfprofiler/nmfprofiler.py
index f1c092e..bdf781e 100644
--- a/nmfprofiler/nmfprofiler.py
+++ b/nmfprofiler/nmfprofiler.py
@@ -47,15 +47,10 @@ def _update_W(W, omics, Hs, mu):
     ----------
     :W: ndarray of shape (n_samples x K), contributions of individuals
         to latent components.
-    :omic1: ndarray of shape (n_samples x n_features_omic1), values of
-        features from (omic1) measured on (n_samples).
-    :omic2: ndarray of shape (n_samples x n_features_omic2), values of
-        features from (omic2) measured on the same (n_samples) samples as
-        (omic1).
-    :H1: ndarray of shape (K x n_features_omic1), latent components
-        built on (omic1).
-    :H2: ndarray of shape (K x n_features_omic2), latent components built on
-        (omic2).
+    :omics: list of (n_omics) ndarrays of shape (n_samples x n_features_omicj), values of
+        features from (omicj) measured on the same (n_samples) samples.
+    :Hs: list of (n_omics) ndarrays of shape (K x n_features_omicj), latent components
+        built on (omicj).
     :mu: float, value for parameter `mu` from objective function.
 
 
@@ -87,7 +82,7 @@ def _update_H(
     H_grad=None
 ):
     """Update the matrix containing latent components built on omic j.
-       (j referring to the omic dataset number, either 1 or 2)
+       (j referring to the omic dataset number, between 1 and J)
 
     Parameters
     ----------
@@ -156,7 +151,7 @@ def _update_H(
 
 def _update_Beta(omic, h_k, y_k, gamma, Beta):
     """Update the regression coefficients linked to omic j and component k.
-    (j referring to the omic dataset number, either 1 or 2)
+    (j referring to the omic dataset number, between 1 and J)
 
     Parameters
     ----------
@@ -204,7 +199,7 @@ def _computeF(
     K_gp,
     details=False
 ):
-    """Compute the value of F given omic1, omic2, Y, W, H1, H2, Beta1, Beta2
+    """Compute the value of F given Y, W, each omicj, Hj, Betaj
     and some hyperparameters (i.e. gamma, lambda and mu).
 
     Calculate the objective function value, as well as each error term,
@@ -212,23 +207,16 @@ def _computeF(
 
     Parameters
     ----------
-    :omic1: ndarray of shape (n_samples x n_features_omic1), values of
-        features from (omic1) measured on (n_samples).
-    :omic2: ndarray of shape (n_samples x n_features_omic2), values of
-        features from (omic2) measured on the same (n_samples) samples
-        as (omic1).
+    :omics: list of (n_omics) ndarrays of shape (n_samples x n_features_omicj), values of
+        features from (omicj) measured on the same (n_samples) samples.
     :Y: ndarray of shape (n_samples x U), one-hot encoding of (y) indicating
         to which group each sample belongs.
     :W: ndarray of shape (n_samples x K), contributions of individuals to
         latent components.
-    :H1: ndarray of shape (K x n_features_omic1), latent components built on
-        (omic1).
-    :H2: ndarray of shape (K x n_features_omic2), latent components built on
-        (omic2).
-    :Beta1: ndarray of shape (K x 1), regression coefficients for projection
-        of individuals from (omic1) onto latent components from H^(1).
-    :Beta2: ndarray of shape (K x 1), regression coefficients for projection
-        of individuals from (omic2) onto latent components from H^(2).
+    :Hs: list of (n_omics) ndarrays of shape (K x n_features_omicj), latent components
+        built on (omicj).
+    :Betas: list of (n_omics) ndarrays of shape (K x 1), regression coefficients for projection
+        of individuals from (omicj) onto latent components from H^(j).
     :mu: float, value for parameter `mu` from objective function.
     :lambdA: float, value for parameter `lambda` from objective function.
     :gamma: float, value for parameter `gamma` from objective function.
@@ -329,7 +317,7 @@ def _linesearch(
     """Find the most suited value for eta^(j).
 
     Calculate the optimal gradient descent step size eta^(j) to update H^(j)
-    (j = 1,2).
+    (j = 1,...,J).
     Instead of reuse each time the initial eta_0^(j), use the last one found,
     eta_(t-1)^(j).
 
@@ -434,37 +422,28 @@ def _analytic_solver(
 
     Parameters
     ----------
-    :omic1: ndarray of shape (n_samples x n_features_omic1), values of
-        features from (omic1) measured on (n_samples).
-    :omic2: ndarray of shape (n_samples x n_features_omic2), values of
-        features from (omic2) measured on the same (n_samples) samples as
-        (omic1).
+    :omics: list of (n_omics) ndarrays of shape (n_samples x n_features_omicj), values of
+        features from (omicj) measured on the same (n_samples) samples.
     :Y: ndarray of shape (n_samples x U), one-hot encoding of (y) indicating
         to which group each sample belongs.
     :W: ndarray of shape (n_samples x K), contributions of individuals to
         latent components, initialized or at (t).
-    :H1: ndarray of shape (K x n_features_omic1), latent components built on
-        (omic1), initialized or at (t).
-    :H2: ndarray of shape (K x n_features_omic2), latent components built on
-        (omic2), initialized or at (t).
-    :Beta1: ndarray of shape (K x 1), regression coefficients for projection
-        of individuals from (omic1) onto (H1), initialized or at (t).
-    :Beta2: ndarray of shape (K x 1), regression coefficients for projection
-        of individuals from (omic2) onto (H2), initialized or at (t).
-    :params: dict of length 8 (optional), values for parameters `gamma`,
-        `lambda`, `mu` from objective function, `eta1`, `eta2` for prox, and
+    :Hs: list of (n_omics) ndarrays of shape (K x n_features_omicj), latent components
+        built on (omicj), initialized or at (t).
+    :Betas: list of (n_omics) ndarrays of shape (K x 1), regression coefficients for projection
+        of individuals from (omicj) onto H^(j),initialized or at (t).
+    :params: dict of length 7 (optional), values for parameters `gamma`,
+        `lambda`, `mu` from objective function, `eta` for prox, and
         `alpha`, `sigma`, `m_back` for linesearch().
-        By default, gamma = 0.005, lambda = 1e-3, mu = 1e-3, eta1 = eta2 = 1,
+        By default, gamma = 0.005, lambda = 1e-3, mu = 1e-3, eta = 1,
         alpha = 2, sigma = 1e-9 and m_back = 1.
-    :current_etas: DESCRIBE.
+    :current_etas: list of (n_omics) floats, value of parameter `etaj`.
     :as_sklearn: boolean, whether or not a modified version of the MU updates
         from scikit-learn is used.
     :backtrack: boolean, if Backtrack LineSearch is performed or not.
     :max_iter_back: int, maximum number of iterations for the backtrack.
-    :H1_loss: list, values of the marginal loss for H^(1).
-    :H2_loss: list, values of the marginal loss for H^(2).
-    :H1_grad: list, values of gradient in H^(1) (before update).
-    :H2_grad: list, values of gradient in H^(2) (before update).
+    :Hlosses: list of (n_omics) lists, vsalues of the marginal loss for H^(j).
+    :Hgrads: list of (n_omics) lists, values of gradient in H^(j) (before update).
     :K: int (optional), number of latent components to build.
         By default, K = 2.
     :K_gp: int, number of components dedicated to groups profiling.
@@ -474,19 +453,12 @@ def _analytic_solver(
     Returns
     -------
     :W_new: ndarray of shape (n_samples x K), updated, or (t+1), W matrix.
-    :H1_new: ndarray of shape (K x n_features_omic1), updated, or (t+1),
-        H1 matrix.
-    :H2_new: ndarray of shape (K x n_features_omic2), updated, or (t+1),
-        H2 matrix.
-    :Beta1_new: vector of length (K), updated, or (t+1), Beta1 vector.
-    :Beta2_new: vector of length (K), updated, or (t+1), Beta2 vector.
-    :params[`eta1`]: float, updated value of parameter `eta1`.
-    :params[`eta2`]: float, updated value of parameter `eta2`.
-    :H1_loss: list, augmented list of marginal losses for H^(1).
-    :H2_loss: list, augmented list of marginal losses for H^(2).
-    :H1_grad: list, augmented list of gradient values in H^(1)
-        (before update).
-    :H2_grad: list, augmented list of gradient values in H^(2)
+    :Hs_new: list of (n_omics) ndarrays of shape (K x n_features_omicj), updated, or (t+1),
+        Hj matrices.
+    :Betas_new: list of (n_omics) vectors of length (K), updated, or (t+1), Betaj vectors.
+    :current_etas: list of (n_omics) floats, updated value of parameter `etaj`.
+    :Hlosses: list of (n_omics) lists, augmented list of marginal losses for H^(j).
+    :Hgrads: list of (n_omics) lists, augmented list of gradient values in H^(j)
         (before update).
 
 
@@ -597,48 +569,42 @@ class NMFProfiler:
 
     The goal of the method is to find relationships between OMICS
     corresponding to typical profiles of distinct groups of individuals. The
-    objective is to find two decompositions, one for each omic, with a common
+    objective is to find one decomposition for each omic, with a common
     contribution of individuals, in which latent factor matrices are sparse.
 
     The objective function
     :math:`\mathcal{F}
-    (\mathbf{W},\mathbf{H}^{(1)},\mathbf{H}^{(2)},\beta^{(1)},\beta^{(2)})`
+    (\mathbf{W},\mathbf{H}^{(1)},\ldots,\mathbf{H}^{(J)},\beta^{(1)},\ldots,\beta^{(J)})`
     is as follows:
 
     .. math::
 
-        & \dfrac{1}{2}\left( \sum_{j=1}^2\| \mathbf{X}^{(j)} -
+        & \dfrac{1}{2}\left( \sum_{j=1}^J\| \mathbf{X}^{(j)} -
         \mathbf{WH}^{(j)} \|_F^2 \right)
 
-        &+ \dfrac{\gamma}{2}\left( \sum_{j=1}^2\| \mathbf{Y} -
+        &+ \dfrac{\gamma}{2}\left( \sum_{j=1}^J\| \mathbf{Y} -
         \mathbf{X}^{(j)} \mathbf{H}^{(j)\top} \text{Diag}(\beta^{(j)})
         \|_F^2 \right)
 
-        &+ \sum_{j=1}^{2} \lambda\|\mathbf{H}^{(j)}\|_1 +
+        &+ \sum_{j=1}^{J} \lambda\|\mathbf{H}^{(j)}\|_1 +
         \dfrac{\mu}{2}\|\mathbf{W \|_F^2}
 
     Parameters
     ----------
-    :omic1: array-like of shape (n_samples x n_features_omic1).
-        First omics dataset. (n_samples) is the number of samples and
-        (n_features_omic1) the number of features.
-
-    :omic2: array-like of shape (n_samples x n_features_omic2).
-        Second omics dataset. (n_samples) is the number of samples and
-        (n_features_omic2) is the number of features.
-        WARNING: (omic2) must contain the exact same samples in the same order
-        as (omic1).
+    :omics: list of (n_omics) array-like of shape (n_samples x n_features_omicj).
+        Omics datasets. (n_samples) is the number of samples and
+        (n_features_omicj) the number of features of (omicj).
+        WARNING: each (omicj) must contain the exact same samples in the same order.
 
     :y: vector of length (n_samples).
-        Group to which each sample belongs (same order than the rows of omic1
-        and omic2).
+        Group to which each sample belongs (same order than the rows of omicj).
 
-    :params: dict of length 8, optional.
+    :params: dict of length 7, optional.
         Contains, in this order, values for hyperparameters `gamma`, `lambda`,
-        `mu` (from the objective function), for `eta1`, `eta2` (when proximal
+        `mu` (from the objective function), for `eta` (when proximal
         optimization is used), and for `alpha`, `sigma`, `m_back` (for
         `linesearch()`).
-        By default, gamma = 1e-2, lambda = 1e-3, mu = 1e-3, eta1 = eta2 = 1,
+        By default, gamma = 1e-2, lambda = 1e-3, mu = 1e-3, eta = 1,
         alpha = 2, sigma = 1e-9, and m_back = 1. In the objective function,
         `lambda` and `gamma` are additionally multiplied by (n_samples).
 
@@ -694,44 +660,31 @@ class NMFProfiler:
 
     Attributes
     ----------
-    :W: ndarray of shape (n_samples x 2).
+    :W: ndarray of shape (n_samples x K).
         Contributions of individuals in each latent component.
 
-    :W_init: ndarray of shape (n_samples x 2).
+    :W_init: ndarray of shape (n_samples x K).
         Initial version of (W).
 
-    :H1: ndarray of shape (2 x n_features_omic1).
-        Latent components for (omic1).
-
-    :H1_init: ndarray of shape (2 x n_features_omic1).
-        Initial version of (H1).
-
-    :H2: ndarray of shape (2 x n_features_omic2).
-        Latent components for (omic2).
-
-    :H2_init: ndarray of shape (2 x n_features_omic2).
-        Initial version of (H2).
-
-    :Beta1: ndarray of shape (2 x 1).
-        Regression coefficients for the projection of individuals from (omic1)
-        onto (H1).
+    :Hs: list of (n_omics) ndarrays of shape (K x n_features_omicj).
+        Latent components Hj for (omicj).
 
-    :Beta1_init: ndarray of shape (2 x 1).
-        Initial version of (Beta1).
+    :Hs_init: list of (n_omics) ndarrays of shape (K x n_features_omicj).
+        Initial version of (Hj).
 
-    :Beta2: ndarray of shape (K x 1).
-        Regression coefficients for the projection of individuals from (omic2)
-        onto (H2).
+    :Betas: list of (n_omics) ndarrays of shape (K x 1).
+        Regression coefficients for the projection of individuals from (omicj)
+        onto (Hj).
 
-    :Beta2_init: ndarray of shape (K x 1).
-        Initial version of (Beta2).
+    :Betas_init: list of (n_omics) ndarrays of shape (K x 1).
+        Initial version of (Betaj).
 
     :n_iter: int.
         Final number of iterations (up to convergence or maximum number of
         iterations is reached).
 
-    :df_etas: `pd.dataFrame` of shape (n_iter+1, 2).
-        Optimal values for parameters `eta1` and `eta2` at each iteration.
+    :df_etas: `pd.dataFrame` of shape (n_iter+1, n_omics).
+        Optimal values for parameter `eta` at each iteration.
 
     :df_errors: `pd.dataFrame` of shape (n_iter+1, 9).
         All error terms for each iteration and omic j.
@@ -739,8 +692,8 @@ class NMFProfiler:
     :df_ldaperf: `pd.DataFrame` of shape (n_iter+1, 13).
         All metrics linked to LDA at each iteration and omic j.
 
-    :df_grads: `pd.DataFrame` of shape (n_iter+1, 2)
-        Values of H^(1) and H^(2) gradients before being updated, at each
+    :df_grads: `pd.DataFrame` of shape (n_iter+1, n_omics)
+        Values of H^(j) gradients before being updated, at each
         iteration.
 
     :runningtime: float.
@@ -791,7 +744,7 @@ class NMFProfiler:
     >>> y = np.array([1, 1, 1, 0, 0, 0])
     >>> seed = 240805
     >>> from nmfprofiler import NMFProfiler
-    >>> model = NMFProfiler(omic1=X1, omic2=X2, y=y, seed=seed)
+    >>> model = NMFProfiler(omics=[X1, X2], y=y, seed=seed)
     >>> res = model.fit()
     >>> print(res)
     >>> res.heatmap(obj_to_viz="W", height=10, width=10, path="")
@@ -884,8 +837,8 @@ class NMFProfiler:
     def _initialize_w_h_beta(self):
         """Initialize matrices W, H^j and Beta^j.
 
-        Several ways to intialize W, H1, H2.
-        Beta1 and Beta2 are initialized with 1s vectors.
+        Several ways to intialize W, Hj.
+        Betaj are initialized with 1s vectors.
 
         Note
         ----
@@ -898,7 +851,7 @@ class NMFProfiler:
         # dedicated to groups, identically to K
         K_gp = len(np.unique(self.y))
 
-        # For W, H1 and H2:
+        # For W and Hj:
         # 1.0. Initialize H0s list
         H0s = [0 for j in range(len(self.omics))]
         # 1.1. Concatenate omics data sets
@@ -923,7 +876,7 @@ class NMFProfiler:
                 del _
         elif self.init_method == "random3":
             # 1.2b. FOR IDENTICAL OMICS DATA SETS - Initialize W
-            # with both omics, H1 with omic1 and H2 as H1 (random)
+            # with both omics, H1 with omic1 and other Hj as H1 (random)
             W0, H0s[0] = _initialize_nmf(
                 X=self.omics[0],
                 n_components=K,
@@ -1356,7 +1309,7 @@ class NMFProfiler:
         Params
         ------
         :new_ind: list. List of arrays containing values of
-            features from omic1 and omic2 for a new sample.
+            features from omicj for a new sample.
 
         Values
         ------
-- 
GitLab


From 2e4af3828d07b6154820940f13a7d0cedeed940d Mon Sep 17 00:00:00 2001
From: Aurelie Mercadie <aurelie.mercadie@inrae.fr>
Date: Fri, 29 Nov 2024 10:43:59 +0100
Subject: [PATCH 17/19] linter corrections

---
 nmfprofiler/nmfprofiler.py | 60 ++++++++++++++++++++++----------------
 1 file changed, 35 insertions(+), 25 deletions(-)

diff --git a/nmfprofiler/nmfprofiler.py b/nmfprofiler/nmfprofiler.py
index bdf781e..83a4ef5 100644
--- a/nmfprofiler/nmfprofiler.py
+++ b/nmfprofiler/nmfprofiler.py
@@ -47,10 +47,11 @@ def _update_W(W, omics, Hs, mu):
     ----------
     :W: ndarray of shape (n_samples x K), contributions of individuals
         to latent components.
-    :omics: list of (n_omics) ndarrays of shape (n_samples x n_features_omicj), values of
-        features from (omicj) measured on the same (n_samples) samples.
-    :Hs: list of (n_omics) ndarrays of shape (K x n_features_omicj), latent components
-        built on (omicj).
+    :omics: list of (n_omics) ndarrays of shape (n_samples x n_features_omicj),
+        values of features from (omicj) measured on the same
+        (n_samples) samples.
+    :Hs: list of (n_omics) ndarrays of shape (K x n_features_omicj),
+        latent components built on (omicj).
     :mu: float, value for parameter `mu` from objective function.
 
 
@@ -207,16 +208,18 @@ def _computeF(
 
     Parameters
     ----------
-    :omics: list of (n_omics) ndarrays of shape (n_samples x n_features_omicj), values of
-        features from (omicj) measured on the same (n_samples) samples.
+    :omics: list of (n_omics) ndarrays of shape (n_samples x n_features_omicj),
+        values of features from (omicj) measured on the same
+        (n_samples) samples.
     :Y: ndarray of shape (n_samples x U), one-hot encoding of (y) indicating
         to which group each sample belongs.
     :W: ndarray of shape (n_samples x K), contributions of individuals to
         latent components.
-    :Hs: list of (n_omics) ndarrays of shape (K x n_features_omicj), latent components
-        built on (omicj).
-    :Betas: list of (n_omics) ndarrays of shape (K x 1), regression coefficients for projection
-        of individuals from (omicj) onto latent components from H^(j).
+    :Hs: list of (n_omics) ndarrays of shape (K x n_features_omicj),
+        latent components built on (omicj).
+    :Betas: list of (n_omics) ndarrays of shape (K x 1), regression
+        coefficients for projection of individuals from (omicj)
+        onto latent components from H^(j).
     :mu: float, value for parameter `mu` from objective function.
     :lambdA: float, value for parameter `lambda` from objective function.
     :gamma: float, value for parameter `gamma` from objective function.
@@ -422,16 +425,18 @@ def _analytic_solver(
 
     Parameters
     ----------
-    :omics: list of (n_omics) ndarrays of shape (n_samples x n_features_omicj), values of
-        features from (omicj) measured on the same (n_samples) samples.
+    :omics: list of (n_omics) ndarrays of shape (n_samples x n_features_omicj),
+        values of features from (omicj) measured on the same
+        (n_samples) samples.
     :Y: ndarray of shape (n_samples x U), one-hot encoding of (y) indicating
         to which group each sample belongs.
     :W: ndarray of shape (n_samples x K), contributions of individuals to
         latent components, initialized or at (t).
-    :Hs: list of (n_omics) ndarrays of shape (K x n_features_omicj), latent components
-        built on (omicj), initialized or at (t).
-    :Betas: list of (n_omics) ndarrays of shape (K x 1), regression coefficients for projection
-        of individuals from (omicj) onto H^(j),initialized or at (t).
+    :Hs: list of (n_omics) ndarrays of shape (K x n_features_omicj),
+        latent components built on (omicj), initialized or at (t).
+    :Betas: list of (n_omics) ndarrays of shape (K x 1), regression
+        coefficients for projection of individuals from (omicj)
+        onto H^(j),initialized or at (t).
     :params: dict of length 7 (optional), values for parameters `gamma`,
         `lambda`, `mu` from objective function, `eta` for prox, and
         `alpha`, `sigma`, `m_back` for linesearch().
@@ -443,7 +448,8 @@ def _analytic_solver(
     :backtrack: boolean, if Backtrack LineSearch is performed or not.
     :max_iter_back: int, maximum number of iterations for the backtrack.
     :Hlosses: list of (n_omics) lists, vsalues of the marginal loss for H^(j).
-    :Hgrads: list of (n_omics) lists, values of gradient in H^(j) (before update).
+    :Hgrads: list of (n_omics) lists, values of gradient in H^(j)
+        (before update).
     :K: int (optional), number of latent components to build.
         By default, K = 2.
     :K_gp: int, number of components dedicated to groups profiling.
@@ -453,13 +459,15 @@ def _analytic_solver(
     Returns
     -------
     :W_new: ndarray of shape (n_samples x K), updated, or (t+1), W matrix.
-    :Hs_new: list of (n_omics) ndarrays of shape (K x n_features_omicj), updated, or (t+1),
-        Hj matrices.
-    :Betas_new: list of (n_omics) vectors of length (K), updated, or (t+1), Betaj vectors.
+    :Hs_new: list of (n_omics) ndarrays of shape (K x n_features_omicj),
+        updated, or (t+1), Hj matrices.
+    :Betas_new: list of (n_omics) vectors of length (K), updated, or (t+1),
+        Betaj vectors.
     :current_etas: list of (n_omics) floats, updated value of parameter `etaj`.
-    :Hlosses: list of (n_omics) lists, augmented list of marginal losses for H^(j).
-    :Hgrads: list of (n_omics) lists, augmented list of gradient values in H^(j)
-        (before update).
+    :Hlosses: list of (n_omics) lists, augmented list of
+        marginal losses for H^(j).
+    :Hgrads: list of (n_omics) lists, augmented list of
+        gradient values in H^(j) (before update).
 
 
     Note
@@ -591,10 +599,12 @@ class NMFProfiler:
 
     Parameters
     ----------
-    :omics: list of (n_omics) array-like of shape (n_samples x n_features_omicj).
+    :omics: list of (n_omics) array-like of shape
+            (n_samples x n_features_omicj).
         Omics datasets. (n_samples) is the number of samples and
         (n_features_omicj) the number of features of (omicj).
-        WARNING: each (omicj) must contain the exact same samples in the same order.
+        WARNING: each (omicj) must contain the exact same samples
+        in the same order.
 
     :y: vector of length (n_samples).
         Group to which each sample belongs (same order than the rows of omicj).
-- 
GitLab


From 19001147d865fb40f978585128031b7cf3f6cce7 Mon Sep 17 00:00:00 2001
From: Aurelie Mercadie <aurelie.mercadie@inrae.fr>
Date: Mon, 9 Dec 2024 15:17:22 +0100
Subject: [PATCH 18/19] added y levels transformation into integers (fixed #9)

---
 nmfprofiler/nmfprofiler.py | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/nmfprofiler/nmfprofiler.py b/nmfprofiler/nmfprofiler.py
index 83a4ef5..ec87ed3 100644
--- a/nmfprofiler/nmfprofiler.py
+++ b/nmfprofiler/nmfprofiler.py
@@ -706,6 +706,10 @@ class NMFProfiler:
         Values of H^(j) gradients before being updated, at each
         iteration.
 
+    :df_y: `pd.DataFrame` of shape (n, 2)
+        Original levels in :y: as well as their associated
+        integer.
+
     :runningtime: float.
         Running time of the method measured through `process_time()`.
 
@@ -970,7 +974,22 @@ class NMFProfiler:
         # dedicated to groups, identically to K
         K_gp = len(np.unique(self.y))
 
-        # Automatically convert y into a one-hot encoded matrix Y
+        # Turn y levels into integers
+        if self.verbose:
+            print("\nConverting y levels into integers...")
+            print("Original levels: ", np.unique(self.y))
+        y_init = deepcopy(self.y)
+        num_levels = {}
+        idx = 0
+        for level in np.unique(self.y):
+            if level not in num_levels:
+                num_levels[level] = idx
+                idx += 1
+        self.y = np.array([num_levels[level] for level in self.y])
+        if self.verbose:
+            print("New levels: ", np.unique(self.y), "\n")
+
+        # And automatically convert y into a one-hot encoded matrix Y
         encoder = OneHotEncoder(handle_unknown="ignore")
         Y = encoder.fit_transform(self.y.reshape(-1, 1)).toarray()
 
@@ -1310,6 +1329,14 @@ class NMFProfiler:
             columns=[f"eta_omic{j+1}" for j in range(len(self.omics))]
         )
 
+        # Build the correspondance matrix between
+        # original levels and their associated integer in y
+        # -------------------------------------------------
+        self.df_y = pd.DataFrame(
+            {"Original_y": y_init,
+             "Recoded_y": self.y}
+        )
+
         return self
 
     def predict(self, new_ind, verbose=False):
-- 
GitLab


From 20a91a141c72c2c375b38e66d14b48f384293357 Mon Sep 17 00:00:00 2001
From: Nathalie Vialaneix <nathalie.vialaneix@inrae.fr>
Date: Wed, 18 Dec 2024 08:13:06 +0100
Subject: [PATCH 19/19] bumped version number and date

---
 codemeta.json       | 4 ++--
 docs/source/conf.py | 2 +-
 pyproject.toml      | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/codemeta.json b/codemeta.json
index 030de6a..1efac7b 100644
--- a/codemeta.json
+++ b/codemeta.json
@@ -5,9 +5,9 @@
     "codeRepository": "https://forgemia.inra.fr/omics-integration/nmfprofiler",
     "dateCreated": "2024-06-24",
     "datePublished": "2024-08-22",
-    "dateModified": "2024-08-23",
+    "dateModified": "2024-12-18",
     "name": "NMFProfiler",
-    "version": "0.2.1",
+    "version": "0.3.O",
     "description": "NMFProfiler: an integrative supervised Non-Negative Matrix Factorization to extract typical profiles of groups of interest combining two different datasets.",
     "applicationCategory": "Biostatistics",
     "funding": "2022/0051",
diff --git a/docs/source/conf.py b/docs/source/conf.py
index cead502..546b33a 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -21,7 +21,7 @@ sys.path.insert(0, os.path.abspath("../.."))
 project = "NMFProfiler"
 copyright = "2024, AurÃ©lie MercadiÃ©"
 author = "AurÃ©lie MercadiÃ©"
-release = "0.2.1"
+release = "0.3.0"
 
 # -- General configuration ---------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
diff --git a/pyproject.toml b/pyproject.toml
index ae29d4e..7ed7b82 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -22,7 +22,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "NMFProfiler"
-version = "0.2.1"
+version = "0.3.0"
 description = "NMFProfiler: an integrative supervised Non-Negative Matrix Factorization to extract typical profiles of groups of interest combining two different datasets."
 readme = "README.md"
 authors = [
-- 
GitLab