diff --git a/.streamlit/config.toml b/.streamlit/config.toml
new file mode 100644
index 000000000..8b3c1a01b
--- /dev/null
+++ b/.streamlit/config.toml
@@ -0,0 +1,6 @@
+[theme]
+primaryColor="#00A86B"
+backgroundColor="#0A0A0A"
+secondaryBackgroundColor="#1A1A1A"
+textColor="#FFFFFF"
+font="sans serif"
\ No newline at end of file
diff --git a/dashboard.py/dashboard_app.py b/dashboard.py/dashboard_app.py
new file mode 100644
index 000000000..6f44c7062
--- /dev/null
+++ b/dashboard.py/dashboard_app.py
@@ -0,0 +1,320 @@
+import sys
+from pathlib import Path
+import plotly.express as px
+
+sys.path.append(str(Path(__file__).resolve().parents[1]))
+
+import streamlit as st
+from www.services.etl.pipeline import openalex_pipeline
+import pandas as pd
+
+st.set_page_config(
+    page_title="Bibliometrix Dashboard",
+    layout="wide"
+)
+
+st.markdown("""
+<style>
+
+.stApp {
+    background-color: #0A0A0A;
+}
+
+h1 {
+    color: #D4AF37 !important;
+}
+
+h2, h3 {
+    color: #00A86B !important;
+}
+
+div[data-testid="metric-container"] {
+    background-color: #1A1A1A;
+    border: 1px solid #D4AF37;
+    padding: 15px;
+    border-radius: 12px;
+}
+
+</style>
+""", unsafe_allow_html=True)
+
+st.title("📚 Bibliometrix Dashboard")
+st.write("OpenAlex ETL + Bibliometric Analysis")
+
+colA, colB = st.columns([3, 1])
+
+with colA:
+    query = st.text_input(
+        "",
+        placeholder="🔍 Search topics like AI, Machine Learning, Data Science..."
+    )
+
+with colB:
+    max_results = st.selectbox(
+        "Documents",
+        [50, 100, 200, 500],
+        index=1
+    )
+
+if not query:
+    query = "machine learning"
+
+df = openalex_pipeline(
+    query=query,
+    max_results=max_results
+)
+col1, col2, col3, col4 = st.columns(4)
+
+with col1:
+    st.metric("Documents", len(df))
+
+with col2:
+    st.metric(
+        "Authors",
+        df["AU"].explode().dropna().nunique()
+    )
+
+with col3:
+    st.metric(
+        "Keywords",
+        df["DE"].explode().dropna().nunique()
+    )
+
+with col4:
+    st.metric(
+        "Total Citations",
+        int(df["TC"].fillna(0).sum())
+    )
+# ==================================================
+# DATASET PREVIEW
+# ==================================================
+
+st.markdown("""
+<h2 style='color:#D4AF37;
+font-size:42px;
+font-weight:700;'>
+📄 STANDARDIZED DATASET PREVIEW
+</h2>
+""", unsafe_allow_html=True)
+
+st.dataframe(df, use_container_width=True)
+
+# ==================================================
+# PUBLICATIONS BY YEAR
+# ==================================================
+
+st.divider()
+
+st.markdown("""
+<h2 style='color:#D4AF37;
+font-size:42px;
+font-weight:700;'>
+📊 PUBLICATIONS BY YEAR
+</h2>
+""", unsafe_allow_html=True)
+
+year_counts = df["PY"].value_counts().sort_index()
+
+fig = px.bar(
+    x=year_counts.index,
+    y=year_counts.values
+)
+
+fig.update_traces(
+    marker_color="#D4AF37"
+)
+
+fig.update_layout(
+    paper_bgcolor="#0A0A0A",
+    plot_bgcolor="#0A0A0A",
+    font_color="white",
+    xaxis_title="Year",
+    yaxis_title="Publications",
+    yaxis=dict(dtick=1),
+    showlegend=False
+)
+
+st.plotly_chart(fig, use_container_width=True)
+
+# ==================================================
+# TOP CITED PAPERS
+# ==================================================
+
+st.divider()
+
+st.markdown("""
+<h2 style='color:#D4AF37;
+font-size:42px;
+font-weight:700;'>
+🏆 TOP 10 MOST CITED PAPERS
+</h2>
+""", unsafe_allow_html=True)
+
+if "TC" in df.columns:
+
+    top_papers = (
+        df.sort_values("TC", ascending=False)
+        [["TI", "TC"]]
+        .head(10)
+    )
+
+    st.dataframe(top_papers, use_container_width=True)
+
+# ==================================================
+# TOP AUTHORS
+# ==================================================
+
+st.divider()
+
+st.markdown("""
+<h2 style='color:#00C78C;
+font-size:42px;
+font-weight:700;'>
+👥 TOP AUTHORS
+</h2>
+""", unsafe_allow_html=True)
+
+top_authors = (
+    df["AU"]
+    .explode()
+    .dropna()
+    .value_counts()
+    .head(10)
+    .reset_index()
+)
+
+top_authors.columns = ["Author", "Publications"]
+
+# Ranking Column
+top_authors.insert(
+    0,
+    "Rank",
+    ["🥇", "🥈", "🥉", "4", "5", "6", "7", "8", "9", "10"]
+)
+
+col1, col2 = st.columns([3, 1])
+
+with col1:
+    st.dataframe(
+        top_authors,
+        use_container_width=True,
+        hide_index=True
+    )
+
+with col2:
+    st.metric(
+        "TOP AUTHOR",
+        top_authors.iloc[0]["Author"]
+    )
+
+    st.metric(
+        "PUBLICATIONS",
+        int(top_authors.iloc[0]["Publications"])
+    )
+# ==================================================
+# TOP KEYWORDS
+# ==================================================
+
+st.divider()
+
+st.markdown("""
+<h2 style='color:#D4AF37;
+font-size:42px;
+font-weight:700;'>
+🔑 TOP KEYWORDS
+</h2>
+""", unsafe_allow_html=True)
+
+top_keywords = (
+    df["DE"]
+    .explode()
+    .dropna()
+    .value_counts()
+    .head(10)
+    .reset_index()
+)
+
+top_keywords.columns = ["Keyword", "Frequency"]
+
+fig = px.bar(
+    top_keywords.sort_values("Frequency"),
+    x="Frequency",
+    y="Keyword",
+    orientation="h",
+    text="Frequency"
+)
+
+fig.update_traces(
+    marker_color="#D4AF37",
+    textposition="outside"
+)
+
+fig.update_layout(
+    paper_bgcolor="#0A0A0A",
+    plot_bgcolor="#0A0A0A",
+    font_color="white",
+    xaxis_title="Frequency",
+    yaxis_title="",
+    showlegend=False,
+    height=600,
+    margin=dict(l=20, r=20, t=20, b=20)
+)
+
+fig.update_xaxes(
+    nticks=8
+)
+
+
+st.plotly_chart(fig, use_container_width=True)
+st.divider()
+
+st.markdown("""
+<h2 style='color:#D4AF37;font-size:42px;font-weight:700;'>
+⬇ EXPORT RESULTS
+</h2>
+""", unsafe_allow_html=True)
+
+csv = df.to_csv(index=False)
+
+st.download_button(
+    label="📥 Download Dataset (CSV)",
+    data=csv,
+    file_name=f"{query}_bibliometric_data.csv",
+    mime="text/csv"
+)
+
+st.divider()
+
+st.markdown("""
+<div style="
+text-align:center;
+padding:30px;
+font-size:15px;
+line-height:1.8;
+color:#CCCCCC;
+">
+
+<h3 style="color:#D4AF37;">
+📚 Bibliometrix Dashboard 
+</h3>
+
+<b style="color:#00C78C;">Developed by</b><br>
+Madhumithra Balasubramanian<br>
+Aya Soundous Hechaichi<br>
+Alina Siddiqui
+
+<br>
+
+<b style="color:#00C78C;">Technologies Used</b><br>
+Python • Streamlit • OpenAlex API • Bibliometrix Framework
+
+<br>
+
+<b style="color:#00C78C;">Hardware and Software for Big Data – Mod B</b><br>
+University of Naples Federico II
+
+<br>
+
+<b style="color:#00C78C;">Professor:</b> Vincenzo Moscato<br>Data Science Course – Academic Year 2025/2026
+</div>
+""", unsafe_allow_html=True)
\ No newline at end of file
diff --git a/functions/get_collaborationnetwork.py b/functions/get_collaborationnetwork.py
index 512ed7489..f89394677 100644
--- a/functions/get_collaborationnetwork.py
+++ b/functions/get_collaborationnetwork.py
@@ -46,7 +46,6 @@ def get_collaboration_network(
     print("Generating collaboration network...")
 
     M = df
-    m = df.get()
     NetRefs = None
     Title = ""
 
diff --git a/www/services/biblionetwork.py b/www/services/biblionetwork.py
index 7e65b4880..4825cb4c7 100644
--- a/www/services/biblionetwork.py
+++ b/www/services/biblionetwork.py
@@ -71,8 +71,8 @@ def crossprod(A, B):
         filtered_index = [idx for idx in NetMatrix.index if str(idx).strip()]
         NetMatrix = NetMatrix.loc[filtered_index, filtered_columns]
 
-        M = M.get()  # Estrai il dizionario se M è un oggetto
-
+        # M is already a DataFrame
+        
         db_name = M["DB"].iloc[0]
         print(f"db_name: {db_name}")
         if network == "references" and db_name == "SCOPUS":
diff --git a/www/services/cocmatrix.py b/www/services/cocmatrix.py
index f523aed67..c53b5a972 100644
--- a/www/services/cocmatrix.py
+++ b/www/services/cocmatrix.py
@@ -1,64 +1,99 @@
 from .utils import *
 
 
-def cocMatrix(df, Field="AU", type="sparse", n=None, sep=";", binary=True, short=False, remove_terms=None, synonyms=None):
+def cocMatrix(df, Field="AU", type="sparse", n=None, sep=";", binary=True,
+              short=False, remove_terms=None, synonyms=None):
+
     """
     Computes occurrences between elements of a Tag Field from a bibliographic data frame.
-    
-    Args:
-        M: A DataFrame obtained by the converting function. It is a data matrix with cases corresponding to articles and variables to Field Tag in the original WoS or SCOPUS file.
-        Field: A string indicating one of the field tags of the standard ISI WoS Field Tag codify.
-        type: Indicates the output format of co-occurrences ("matrix" or "sparse").
-        n: An integer indicating the number of items to select. If None, all items are selected.
-        sep: The field separator character.
-        binary: A boolean. If True each cell contains a 0/1. If False each cell contains the frequency.
-        short: A boolean. If True all items with frequency < 2 are deleted to reduce the matrix size.
-        remove_terms: A list of additional terms to delete from the documents before term extraction.
-        synonyms: A list of synonyms that will be merged into a single term.
-        
-    Returns:
-        A bipartite network matrix with cases corresponding to manuscripts and variables to the objects extracted from the Tag Field.
     """
-    M = df.get()
+
+    # Support both wrapped objects and pandas DataFrames
+    if hasattr(df, "get") and not isinstance(df, pd.DataFrame):
+        M = df
+    else:
+        M = df
 
     if "LABEL" not in M.columns:
         M.index = M["SR"]
         print("Processing field: " + Field + "\n")
+
     RowNames = M.index
 
     # REMOVE TERMS AND MERGE SYNONYMS
     if Field in ["ID", "DE", "TI", "TI_TM", "AB", "AB_TM"]:
-        Fi = M[Field].fillna("").apply(lambda x: x if isinstance(x, list) else [i.strip() for i in x.split(sep)])
-        TERMS = pd.DataFrame({"item": [item.upper() for sublist in Fi for item in sublist], "SR": M.index.repeat(Fi.str.len())})
+
+        Fi = M[Field].fillna("").apply(
+            lambda x: x if isinstance(x, list)
+            else [i.strip() for i in str(x).split(sep)]
+        )
+
+        TERMS = pd.DataFrame({
+            "item": [item.upper() for sublist in Fi for item in sublist],
+            "SR": M.index.repeat(Fi.str.len())
+        })
 
         # Merge synonyms
         if synonyms:
-            synonyms_dict = {syn.split(";")[0].strip().upper(): [s.strip().upper() for s in syn.split(";")[1:]] for syn in synonyms}
+            synonyms_dict = {
+                syn.split(";")[0].strip().upper():
+                [s.strip().upper() for s in syn.split(";")[1:]]
+                for syn in synonyms
+            }
+
             for key, values in synonyms_dict.items():
                 TERMS["item"] = TERMS["item"].replace(values, key)
 
         # Remove terms
         if remove_terms:
-            TERMS = TERMS[~TERMS["item"].str.upper().isin([term.strip().upper() for term in remove_terms])]
+            TERMS = TERMS[
+                ~TERMS["item"].str.upper().isin(
+                    [term.strip().upper() for term in remove_terms]
+                )
+            ]
+
+        TERMS = TERMS.groupby("SR")["item"].apply(
+            lambda x: ";".join(x)
+        ).reset_index()
+
+        M = (
+            M.drop(columns=[Field, "SR"])
+             .merge(TERMS, on="SR", how="left")
+             .rename(columns={"item": Field})
+        )
 
-        TERMS = TERMS.groupby("SR")["item"].apply(lambda x: ";".join(x)).reset_index()
-        M = M.drop(columns=[Field, 'SR']).merge(TERMS, on="SR", how="left").rename(columns={"item": Field})
         M.index = RowNames
 
     if Field == "CR":
-        M["CR"] = M["CR"].apply(lambda x: [ref.replace("DOI;", "DOI ") for ref in x] if isinstance(x, list) else x)
+        M["CR"] = M["CR"].apply(
+            lambda x: [ref.replace("DOI;", "DOI ") for ref in x]
+            if isinstance(x, list)
+            else x
+        )
 
     if Field in M.columns:
-        Fi = M[Field].fillna("").apply(lambda x: x if isinstance(x, list) else [i.strip() for i in x.split(sep)])
+        Fi = M[Field].fillna("").apply(
+            lambda x: x if isinstance(x, list)
+            else [i.strip() for i in str(x).split(sep)]
+        )
     else:
         print(f"Field {Field} is not a column name of input data frame")
-        return
+        return None
+
+    Fi = Fi.apply(lambda x: [i.strip() for i in x])
 
-    Fi = Fi.apply(lambda x: [i.strip() for i in x])  # Equivalent to trim.leading in R
     if Field == "CR":
-        Fi = Fi.apply(lambda x: [i for i in x if len(i) > 10])  # Delete not congruent references
+        Fi = Fi.apply(
+            lambda x: [i for i in x if len(i) > 10]
+        )
+
+    allField = [
+        item
+        for sublist in Fi
+        for item in sublist
+        if item
+    ]
 
-    allField = [item for sublist in Fi for item in sublist if item]
     if Field == "CR":
         allField = reduceRefs(allField)
         Fi = Fi.apply(reduceRefs)
@@ -81,30 +116,52 @@ def cocMatrix(df, Field="AU", type="sparse", n=None, sep=";", binary=True, short
         WF = lil_matrix((M.shape[0], len(uniqueField)))
     else:
         print("Error in type argument")
-        return
+        return None
+
+    col_idx = {
+        term: idx
+        for idx, term in enumerate(uniqueField)
+    }
 
-    col_idx = {term: idx for idx, term in enumerate(uniqueField)}
-    row_idx = {sr: idx for idx, sr in enumerate(M.index)}
+    row_idx = {
+        sr: idx
+        for idx, sr in enumerate(M.index)
+    }
 
     for i, terms in Fi.items():
+
         if terms:
+
             if binary:
-                indices = [col_idx[term] for term in set(terms) if term in col_idx]
+
+                indices = [
+                    col_idx[term]
+                    for term in set(terms)
+                    if term in col_idx
+                ]
+
                 WF[row_idx[i], indices] = 1
+
             else:
+
                 term_counts = pd.Series(terms).value_counts()
+
                 for term, count in term_counts.items():
+
                     if term in col_idx:
                         WF[row_idx[i], col_idx[term]] = count
 
     if type == "sparse" and not binary:
         WF = lil_matrix(WF)
 
-    # Convert the sparse matrix to a DataFrame for better readability
-    WF_df = pd.DataFrame(WF.toarray(), index=M.index, columns=uniqueField)
+    WF_df = pd.DataFrame(
+        WF.toarray(),
+        index=M.index,
+        columns=uniqueField
+    )
+
     if binary:
-        WF_df = WF_df.astype(int)  # Ensure binary values are 0 and 1
-    # print(WF_df)
+        WF_df = WF_df.astype(int)
 
     return WF_df
 
diff --git a/www/services/couplingmap.py b/www/services/couplingmap.py
index a2b3628d7..3819207cd 100644
--- a/www/services/couplingmap.py
+++ b/www/services/couplingmap.py
@@ -6,7 +6,7 @@
 from .histnetwork import *
 from .metatagextraction import *
 from .tabletag import *
-
+import pandas as pd
 def couplingMap(df, analysis="documents", field="CR", n=500, minfreq=5,
                 ngrams=1, community_repulsion=0.1, impact_measure="local",
                 stemming=False, size=0.5, label_term=None, n_labels=1, repel=True, clustering="walktrap"):
@@ -15,8 +15,8 @@ def couplingMap(df, analysis="documents", field="CR", n=500, minfreq=5,
         print('\nanalysis argument is incorrect.\n\nPlease select one of the following choices: "documents", "authors", "sources"\n\n')
         return None
 
-    df = metaTagExtraction(df, "SR") # serve questo per avere il merging perfetto per uniformare la colonna SR
-    M = df.get()
+    df = metaTagExtraction(df, "SR")
+    M = df
 
     ngrams = int(ngrams)
     minfreq = max(0, int(minfreq * len(M) // 1000))
@@ -62,13 +62,24 @@ def couplingMap(df, analysis="documents", field="CR", n=500, minfreq=5,
     C = L.merge(Net['cluster_res'], on=analysis, how='left', copy=True)
     
     # Get group membership and colors
-    group = Net['cluster_obj'].membership
-    color = net.vs['color']
-    
-    # Convert colors to hex and handle NaN values
-    color = [to_hex(c) if pd.notna(c) else "#D3D3D3" for c in color]
-    # color[pd.isna(color)] = "#B3B3B3" # Colore grigio chiaro in formato RGBA
+    group = list(Net['cluster_obj'].membership)
+    color = list(net.vs['color'])
+
+    # Convert colors
+    color = [
+    to_hex(c) if pd.notna(c) else "#D3D3D3"
+    for c in color
+    ]
 
+    # Make lengths match
+    min_len = min(len(D), len(group), len(color))
+
+    D = D.iloc[:min_len].copy()
+
+    group = group[:min_len]
+    color = color[:min_len]
+    
+    
     D['group'] = group
     D['color'] = color
 
@@ -308,68 +319,95 @@ def limit_to_first(text):
 #### FUNCTION DA METTERE IN SERVICES???
 # Normalizzazione del punteggio di citazione
 def normalizeCitationScore(df, field="documents", impact_measure="local"):
+
     if field not in ["documents", "authors", "sources"]:
-        print('\nfield argument is incorrect.\n\nPlease select one of the following choices: "documents", "authors", "sources"\n\n')
+        print(
+            '\nfield argument is incorrect.\n\n'
+            'Please select one of the following choices: '
+            '"documents", "authors", "sources"\n\n'
+        )
         return None
 
-    # Applica localCitations se richiesto
+    # OpenAlex workaround
     if impact_measure == "local":
-        df = localCitations(df, fast_search=False, sep=";")['M']
+
+        if df["DB"].iloc[0] == "OPENALEX":
+            df["LCS"] = 1
+
+        else:
+            df = localCitations(
+                df,
+                fast_search=False,
+                sep=";"
+            )["M"]
+
     else:
-        df['LCS'] = 0
+        df["LCS"] = 0
+
+    df["TC"] = pd.to_numeric(df["TC"], errors="coerce")
+    df["PY"] = pd.to_numeric(df["PY"], errors="coerce")
 
-    # Converte colonne in numerico
-    df['TC'] = df['TC'].astype(float, errors='ignore')
-    df['PY'] = df['PY'].astype(float, errors='ignore')
+    df["LCS"] = df["LCS"].replace(0, 1)
 
-    # Rimpiazza LCS=0 con 1 e calcola NGCS/NLCS per anno
-    df['LCS'] = df['LCS'].replace(0, 1)
-    df['NGCS'] = df.groupby('PY')['TC'].transform(lambda x: x / x.mean(skipna=True))
-    df['NLCS'] = df.groupby('PY')['LCS'].transform(lambda x: x / x.mean(skipna=True))
+    df["NGCS"] = df.groupby("PY")["TC"].transform(
+        lambda x: x / x.mean()
+    )
+
+    df["NLCS"] = df.groupby("PY")["LCS"].transform(
+        lambda x: x / x.mean()
+    )
 
-    # Suddivisione per tipo di campo richiesto
     if field == "documents":
-        NCS = df[['SR', 'PY', 'NGCS', 'NLCS', 'TC', 'LCS']].rename(columns={
-            'NGCS': 'MNGCS',
-            'NLCS': 'MNLCS',
-            'LCS': 'LC',
-            'SR': 'documents'
-        })
+
+        NCS = df[
+            ["SR", "PY", "NGCS", "NLCS", "TC", "LCS"]
+        ].rename(
+            columns={
+                "SR": "documents",
+                "NGCS": "MNGCS",
+                "NLCS": "MNLCS",
+                "LCS": "LC"
+            }
+        )
 
     elif field == "authors":
-        df['AU'] = df['AU'].fillna('').str.split(';')  # Divide gli autori
-        exploded = df.explode('AU').assign(AU=lambda x: x['AU'].str.strip())  # Espande e rimuove spazi extra
+
+        exploded = (
+            df.assign(AU=df["AU"].str.split(";"))
+            .explode("AU")
+        )
 
         NCS = (
-            exploded.groupby('AU').agg(
-                NP=('PY', 'count'),
-                MNGCS=('NGCS', 'mean'),
-                MNLCS=('NLCS', 'mean'),
-                TC=('TC', 'mean'),
-                LC=('LCS', 'mean')
+            exploded.groupby("AU")
+            .agg(
+                NP=("PY", "count"),
+                MNGCS=("NGCS", "mean"),
+                MNLCS=("NLCS", "mean"),
+                TC=("TC", "mean"),
+                LC=("LCS", "mean")
             )
             .reset_index()
-            .rename(columns={'AU': 'authors'})
+            .rename(columns={"AU": "authors"})
         )
 
-    elif field == "sources":
+    else:
+
         NCS = (
-            df.groupby('SO').agg(
-                NP=('PY', 'count'),
-                MNGCS=('NGCS', 'mean'),
-                MNLCS=('NLCS', 'mean'),
-                TC=('TC', 'mean'),
-                LC=('LCS', 'mean')
+            df.groupby("SO")
+            .agg(
+                NP=("PY", "count"),
+                MNGCS=("NGCS", "mean"),
+                MNLCS=("NLCS", "mean"),
+                TC=("TC", "mean"),
+                LC=("LCS", "mean")
             )
             .reset_index()
-            .rename(columns={'SO': 'sources'})
+            .rename(columns={"SO": "sources"})
         )
 
-    # Gestione impatto globale
     if impact_measure == "global":
-        NCS.drop(columns=['MNLCS', 'LC'], errors='ignore', inplace=True)
-    else:
-        NCS['MNLCS'] = NCS['MNLCS'].fillna(0)
+        NCS["MNLCS"] = NCS["MNGCS"]
+        NCS["LC"] = NCS["TC"]
 
     return NCS
 
@@ -517,7 +555,7 @@ def best_lab(df, tab_global, n_labels, term):
 
 def localCitations(df, fast_search=False, sep=";"):
     df = metaTagExtraction(df, "SR")
-    M = df.get() 
+    M = df
     M['TC'] = M['TC'].fillna(0)
     if fast_search:
         loccit = M['TC'].quantile(0.75)
diff --git a/www/services/etl/__init__.py b/www/services/etl/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/www/services/etl/api_retriever.py b/www/services/etl/api_retriever.py
new file mode 100644
index 000000000..e0f975e52
--- /dev/null
+++ b/www/services/etl/api_retriever.py
@@ -0,0 +1,116 @@
+import requests
+import time
+
+
+OPENALEX_URL = "https://api.openalex.org/works"
+
+
+def fetch_openalex(query, max_results=100):
+
+    results = []
+    per_page = 25
+    page = 1
+    retries = 3
+
+    while len(results) < max_results:
+
+        params = {
+            "search": query,
+            "per-page": per_page,
+            "page": page
+        }
+
+        success = False
+
+        for attempt in range(retries):
+
+            try:
+
+                response = requests.get(
+                    OPENALEX_URL,
+                    params=params,
+                    timeout=30
+                )
+
+                response.raise_for_status()
+
+                data = response.json()
+
+                works = data.get("results", [])
+
+                if not works:
+                    return results[:max_results]
+
+                results.extend(works)
+
+                success = True
+
+                break
+
+            except requests.exceptions.RequestException:
+
+                time.sleep(2)
+
+        if not success:
+            break
+
+        page += 1
+
+        time.sleep(1)
+
+    return results[:max_results]
+
+
+def openalex_to_records(results):
+
+    records = []
+
+    for work in results:
+
+        authors = [
+            a.get("author", {}).get("display_name", "")
+            for a in work.get("authorships", [])
+        ]
+
+        affiliations = []
+
+        for a in work.get("authorships", []):
+            for inst in a.get("institutions", []):
+                name = inst.get("display_name", "")
+                if name:
+                    affiliations.append(name)
+
+        keywords = [
+            k.get("display_name", "")
+            for k in work.get("keywords", [])
+        ]
+
+        concepts = [
+            c.get("display_name", "")
+            for c in work.get("concepts", [])
+        ]
+
+        record = {
+            "DB": "OPENALEX",
+            "UT": work.get("id", ""),
+            "DI": work.get("doi", ""),
+            "TI": work.get("title", ""),
+            "PY": work.get("publication_year", ""),
+            "AB": str(work.get("abstract_inverted_index", {})),
+            "TC": work.get("cited_by_count", 0),
+            "CR": work.get("referenced_works", []),
+            "SO": work.get("display_name", ""),
+            "SR": work.get("display_name", ""),
+            "AU": authors,
+            "AF": authors,
+            "DE": keywords,
+            "ID": concepts,
+            "C1": affiliations,
+            "DT": work.get("type", ""),
+            "LA": work.get("language", ""),
+            "RP": authors[0] if authors else ""
+        }
+
+        records.append(record)
+
+    return records
\ No newline at end of file
diff --git a/www/services/etl/dispatcher.py b/www/services/etl/dispatcher.py
new file mode 100644
index 000000000..e90af05b1
--- /dev/null
+++ b/www/services/etl/dispatcher.py
@@ -0,0 +1,29 @@
+"""
+Dispatcher for selecting mappings
+based on source database.
+"""
+
+from .mappings import (
+    SCOPUS_MAPPING,
+    DIMENSIONS_MAPPING,
+    PUBMED_MAPPING,
+    OPENALEX_MAPPING
+)
+
+
+def get_mapping(source: str):
+    source = source.upper()
+
+    if source == "SCOPUS":
+        return SCOPUS_MAPPING
+
+    if source == "DIMENSIONS":
+        return DIMENSIONS_MAPPING
+
+    if source == "PUBMED":
+        return PUBMED_MAPPING
+
+    if source == "OPENALEX":
+        return OPENALEX_MAPPING
+
+    raise ValueError(f"Unsupported source: {source}")
\ No newline at end of file
diff --git a/www/services/etl/mappings.py b/www/services/etl/mappings.py
new file mode 100644
index 000000000..ae01cfc1e
--- /dev/null
+++ b/www/services/etl/mappings.py
@@ -0,0 +1,52 @@
+SCOPUS_MAPPING = {
+    "Authors": "AU",
+    "Author full names": "AF",
+    "Title": "TI",
+    "Source title": "SO",
+    "Year": "PY",
+    "DOI": "DI",
+    "Abstract": "AB",
+    "Volume": "VL",
+    "Issue": "IS",
+    "Page start": "BP",
+    "Page end": "EP",
+    "Author Keywords": "DE",
+    "Index Keywords": "ID",
+    "Language of Original Document": "LA",
+    "Document Type": "DT",
+    "Affiliations": "C1",
+    "References": "CR",
+    "PubMed ID": "PMID"
+}
+
+DIMENSIONS_MAPPING = {
+    "Authors": "AU",
+    "Title": "TI",
+    "Source title": "SO",
+    "Year": "PY",
+    "DOI": "DI",
+    "Abstract": "AB"
+}
+
+PUBMED_MAPPING = {
+    "PMID": "PMID",
+    "Title": "TI",
+    "Abstract": "AB",
+    "Authors": "AU",
+    "Journal": "SO",
+    "Year": "PY",
+    "Keywords": "DE"
+}
+
+OPENALEX_MAPPING = {
+    "id": "UT",
+    "doi": "DI",
+    "title": "TI",
+    "publication_year": "PY",
+    "abstract": "AB",
+    "display_name": "SO",
+    "referenced_works": "CR",
+    "authorships": "AU",
+    "institutions": "C1",
+    "cited_by_count": "TC"
+}
\ No newline at end of file
diff --git a/www/services/etl/pipeline.py b/www/services/etl/pipeline.py
new file mode 100644
index 000000000..15103d89a
--- /dev/null
+++ b/www/services/etl/pipeline.py
@@ -0,0 +1,30 @@
+import pandas as pd
+
+from .api_retriever import (
+    fetch_openalex,
+    openalex_to_records
+)
+
+from .transformer import transform_dataframe
+from .validator import validate_dataframe
+
+
+def openalex_pipeline(query, max_results=100):
+
+    results = fetch_openalex(
+        query=query,
+        max_results=max_results
+    )
+
+    records = openalex_to_records(results)
+
+    df = pd.DataFrame(records)
+
+    df = transform_dataframe(
+        df,
+        source="openalex"
+    )
+
+    validate_dataframe(df)
+
+    return df
\ No newline at end of file
diff --git a/www/services/etl/schema.py b/www/services/etl/schema.py
new file mode 100644
index 000000000..a585cebc2
--- /dev/null
+++ b/www/services/etl/schema.py
@@ -0,0 +1,35 @@
+MANDATORY_COLUMNS = [
+    "AU",
+    "TI",
+    "SO",
+    "PY"
+]
+
+MULTI_VALUE_COLUMNS = [
+    "AU",
+    "AF",
+    "C1",
+    "CR",
+    "DE",
+    "ID"
+]
+
+REQUIRED_COLUMNS = [
+    "SR",
+    "AU",
+    "AF",
+    "TI",
+    "SO",
+    "PY",
+    "DI",
+    "DE",
+    "ID",
+    "CR",
+    "C1",
+    "AB",
+    "TC",
+    "DT",
+    "LA",
+    "DB",
+    "RP"
+]
\ No newline at end of file
diff --git a/www/services/etl/test_biblionetwork.py b/www/services/etl/test_biblionetwork.py
new file mode 100644
index 000000000..6b38e2e8a
--- /dev/null
+++ b/www/services/etl/test_biblionetwork.py
@@ -0,0 +1,17 @@
+from www.services.etl.pipeline import openalex_pipeline
+from www.services.biblionetwork import biblionetwork
+
+df = openalex_pipeline(
+    query="machine learning",
+    max_results=50
+)
+
+net = biblionetwork(
+    df,
+    analysis="co-occurrences",
+    network="keywords"
+)
+
+print(type(net))
+print(net.shape)
+print(net.head())
diff --git a/www/services/etl/test_cocitation.py b/www/services/etl/test_cocitation.py
new file mode 100644
index 000000000..1b25e55bc
--- /dev/null
+++ b/www/services/etl/test_cocitation.py
@@ -0,0 +1,17 @@
+from www.services.etl.pipeline import openalex_pipeline
+from www.services.biblionetwork import biblionetwork
+
+df = openalex_pipeline(
+    query="machine learning",
+    max_results=50
+)
+
+net = biblionetwork(
+    df,
+    analysis="co-citation",
+    network="references"
+)
+
+print(type(net))
+print(net.shape)
+print(net.head())
\ No newline at end of file
diff --git a/www/services/etl/test_collaboration.py b/www/services/etl/test_collaboration.py
new file mode 100644
index 000000000..51bf18c42
--- /dev/null
+++ b/www/services/etl/test_collaboration.py
@@ -0,0 +1,17 @@
+from www.services.etl.pipeline import openalex_pipeline
+from www.services.biblionetwork import biblionetwork
+
+df = openalex_pipeline(
+    query="machine learning",
+    max_results=50
+)
+
+net = biblionetwork(
+    df,
+    analysis="collaboration",
+    network="authors"
+)
+
+print(type(net))
+print(net.shape)
+print(net.head())
\ No newline at end of file
diff --git a/www/services/etl/test_coupling_network.py b/www/services/etl/test_coupling_network.py
new file mode 100644
index 000000000..7830db40e
--- /dev/null
+++ b/www/services/etl/test_coupling_network.py
@@ -0,0 +1,17 @@
+from www.services.etl.pipeline import openalex_pipeline
+from www.services.biblionetwork import biblionetwork
+
+df = openalex_pipeline(
+    query="machine learning",
+    max_results=50
+)
+
+net = biblionetwork(
+    df,
+    analysis="coupling",
+    network="authors"
+)
+
+print(type(net))
+print(net.shape)
+print(net.head())
\ No newline at end of file
diff --git a/www/services/etl/test_couplingmap.py b/www/services/etl/test_couplingmap.py
new file mode 100644
index 000000000..8e1eeb15d
--- /dev/null
+++ b/www/services/etl/test_couplingmap.py
@@ -0,0 +1,17 @@
+from www.services.etl.pipeline import openalex_pipeline
+from www.services.couplingmap import couplingMap
+
+df = openalex_pipeline(
+    query="machine learning",
+    max_results=50
+)
+
+result = couplingMap(
+    df,
+    analysis="documents",
+    field="CR"
+)
+
+print(result["clusters"].head())
+print(result["data"].head())
+print(result["nclust"])
\ No newline at end of file
diff --git a/www/services/etl/test_histnetwork.py b/www/services/etl/test_histnetwork.py
new file mode 100644
index 000000000..384308b5f
--- /dev/null
+++ b/www/services/etl/test_histnetwork.py
@@ -0,0 +1,13 @@
+# test_histnetwork.py
+
+from www.services.etl.pipeline import openalex_pipeline
+from www.services.histnetwork import histNetwork
+df = openalex_pipeline(
+    query="machine learning",
+    max_results=50
+)
+
+result = histNetwork(df)
+
+print(type(result))
+print(result.keys())
\ No newline at end of file
diff --git a/www/services/etl/test_pipeline.py b/www/services/etl/test_pipeline.py
new file mode 100644
index 000000000..6846f4cdd
--- /dev/null
+++ b/www/services/etl/test_pipeline.py
@@ -0,0 +1,14 @@
+from www.services.etl.pipeline import openalex_pipeline
+
+df = openalex_pipeline(
+    query="machine learning",
+    max_results=5
+)
+
+print(df.columns.tolist())
+
+print("\nID column:")
+print(df["ID"].head())
+
+print("\nDE column:")
+print(df["DE"].head())
\ No newline at end of file
diff --git a/www/services/etl/test_thematicmap.py b/www/services/etl/test_thematicmap.py
new file mode 100644
index 000000000..3648d6b93
--- /dev/null
+++ b/www/services/etl/test_thematicmap.py
@@ -0,0 +1,24 @@
+from www.services.etl.pipeline import openalex_pipeline
+from www.services.thematicmap import thematic_map
+
+df = openalex_pipeline(
+    query="machine learning",
+    max_results=50
+)
+
+result = thematic_map(
+    df,
+    field="ID"
+)
+
+print(type(result))
+print(len(result))
+
+for i, item in enumerate(result):
+    print(f"\n===== RESULT[{i}] =====")
+    print(type(item))
+
+    try:
+        print(item.head())
+    except:
+        print(item)
\ No newline at end of file
diff --git a/www/services/etl/transformer.py b/www/services/etl/transformer.py
new file mode 100644
index 000000000..461e183bb
--- /dev/null
+++ b/www/services/etl/transformer.py
@@ -0,0 +1,87 @@
+import pandas as pd
+from .dispatcher import get_mapping
+from .schema import (
+    MULTI_VALUE_COLUMNS,
+    REQUIRED_COLUMNS
+)
+
+def ensure_required_columns(df):
+
+    for col in REQUIRED_COLUMNS:
+        if col not in df.columns:
+            df[col] = ""
+
+    return df
+
+def split_multi_value(value):
+
+    if isinstance(value, list):
+        return value
+
+    if value is None:
+        return []
+
+    try:
+        if pd.isna(value):
+            return []
+    except Exception:
+        pass
+
+    if str(value).strip() == "":
+        return []
+
+    return [
+        item.strip()
+        for item in str(value).replace(",", ";").split(";")
+        if item.strip()
+    ]
+
+
+def standardize_nulls(df):
+    
+    return df.fillna("")
+
+
+def rename_columns(df, source):
+    
+    mapping = get_mapping(source)
+    
+    existing_mapping = {
+        k: v
+        for k, v in mapping.items()
+        if k in df.columns
+    }
+
+    return df.rename(columns=existing_mapping)
+
+
+def standardize_multivalue_columns(df):
+    
+    for col in MULTI_VALUE_COLUMNS:
+
+        if col not in df.columns:
+            continue
+
+        df[col] = df[col].apply(split_multi_value)
+
+    return df
+
+def ensure_required_columns(df):
+
+    for col in REQUIRED_COLUMNS:
+        if col not in df.columns:
+            df[col] = ""
+
+    return df
+
+def transform_dataframe(df, source):
+
+    df = rename_columns(df, source)
+
+    df = standardize_nulls(df)
+
+    df = ensure_required_columns(df)
+
+    df = standardize_multivalue_columns(df)
+
+    return df
\ No newline at end of file
diff --git a/www/services/etl/validator.py b/www/services/etl/validator.py
new file mode 100644
index 000000000..969e71cd7
--- /dev/null
+++ b/www/services/etl/validator.py
@@ -0,0 +1,68 @@
+from .schema import (
+    MANDATORY_COLUMNS,
+    MULTI_VALUE_COLUMNS
+)
+
+
+def validate_columns(df):
+    """
+    Ensure all required columns exist.
+    """
+
+    missing = [
+        col
+        for col in MANDATORY_COLUMNS
+        if col not in df.columns
+    ]
+
+    if missing:
+        raise ValueError(
+            f"Missing mandatory columns: {missing}"
+        )
+
+    return True
+
+
+def validate_nulls(df):
+    """
+    Ensure no null values remain.
+    """
+
+    if df.isnull().values.any():
+        raise ValueError(
+            "Dataset contains null values."
+        )
+
+    return True
+
+
+def validate_multivalue_types(df):
+    """
+    Ensure multivalue columns contain lists.
+    """
+
+    for col in MULTI_VALUE_COLUMNS:
+
+        if col not in df.columns:
+            continue
+
+        for value in df[col]:
+
+            if not isinstance(value, list):
+                raise TypeError(
+                    f"{col} contains non-list value."
+                )
+
+    return True
+
+
+def validate_dataframe(df):
+    """
+    Full validation pipeline.
+    """
+
+    validate_columns(df)
+    validate_nulls(df)
+    validate_multivalue_types(df)
+
+    return True
\ No newline at end of file
diff --git a/www/services/format_functions.py b/www/services/format_functions.py
index 1a8ee7af4..9cac97870 100644
--- a/www/services/format_functions.py
+++ b/www/services/format_functions.py
@@ -3,7 +3,9 @@
 import zipfile
 import tempfile
 import os
-
+from www.services.etl.transformer import transform_dataframe
+from www.services.etl.validator import validate_dataframe
+import pandas as pd
 
 def format_ab_column(entry, source, file_type):         # Function for AB Column (format--> "Abstract")
     abstract = ''
@@ -1632,11 +1634,21 @@ def process_single_file(data, source, file_type, author):
             entry_data.pop('AF', None)  # Remove 'AF' if it exists
         elif author == "fullname":
             entry_data.pop('AU', None)  # Remove 'AU' if it exists
+            entries.append(entry_data)
+
+    # =====================================================
+    # ETL STANDARDIZATION + VALIDATION PIPELINE
+    # =====================================================
 
-        entries.append(entry_data)
+    df = pd.DataFrame(entries)
 
-    return entries
+    try:
+        df = transform_dataframe(df, source.upper())
+        validate_dataframe(df)
+    except Exception as e:
+        print(f"ETL Validation Warning: {e}")
 
+    return df.to_dict(orient="records")
 
 def biblio_json(data, source, type, author):
     """
diff --git a/www/services/histnetwork.py b/www/services/histnetwork.py
index 7848d9744..02cdf97bf 100644
--- a/www/services/histnetwork.py
+++ b/www/services/histnetwork.py
@@ -3,24 +3,9 @@
 
 
 def histNetwork(df, min_citations=0, sep=";", network=True):
-    """
-    Create a historical network of citations from a DataFrame containing metadata of scientific papers.
-    
-    Args:
-        df (DataFrame): A DataFrame containing metadata of scientific papers.
-        min_citations (int): Minimum number of citations to include a paper in the analysis.
-        sep (str): Separator used to separate references in the citation network.
-        network (bool): If True, a citation network is created.
-    
-    Returns:
-        A dictionary containing the following keys:
-            - NetMatrix: A DataFrame containing the citation network.
-            - histData: A DataFrame containing the metadata of the papers.
-            - M: A DataFrame containing the metadata of the papers with the Local Citation Score (LCS).
-            - LCS: A list containing the Local Citation Score of each paper.
-    """
-    M = df.get()
-    db = M['DB'][0]
+
+    M = df
+    db = M['DB'].iloc[0]
 
     # Ensure required fields are present
     if 'DI' not in M:
@@ -35,9 +20,29 @@ def histNetwork(df, min_citations=0, sep=";", network=True):
     M['TC'] = M['TC'].fillna(0)
 
     if db == "Web_of_Science":
-        results = wos(M, min_citations=min_citations, sep=sep, network=network)
+        results = wos(
+            M,
+            min_citations=min_citations,
+            sep=sep,
+            network=network
+        )
+
     elif db == "Scopus":
-        results = scopus(M, min_citations=min_citations, sep=sep, network=network)
+        results = scopus(
+            M,
+            min_citations=min_citations,
+            sep=sep,
+            network=network
+        )
+
+    elif db == "OPENALEX":
+        results = wos(
+            M,
+            min_citations=min_citations,
+            sep=sep,
+            network=network
+        )
+
     else:
         print("\nDatabase not compatible with direct citation analysis\n")
         return None
@@ -76,36 +81,98 @@ def wos(M, min_citations, sep, network):
     print(f"\nAnalyzing {len(CR)} reference items...\n")
 
     CR_df = pd.DataFrame(CR)
+    if M["DB"].iloc[0] == "OPENALEX":
+
+     M["UT_CLEAN"] = (
+        M["UT"]
+        .fillna("")
+        .astype(str)
+        .str.upper()
+        .str.replace(".", "", regex=False)
+    )
+
+    CR_df["UT_CLEAN"] = (
+        CR_df["ref"]
+        .fillna("")
+        .astype(str)
+        .str.upper()
+        .str.replace(".", "", regex=False)
+    )
+
+    L = pd.merge(
+        M,
+        CR_df,
+        left_on="UT_CLEAN",
+        right_on="UT_CLEAN",
+        how="left"
+    )
 
+    
+
+    L = L[L["Paper_y"].notnull()]
+
+        
     # Add LABEL field to M and CR
-    M['LABEL'] = M['SR_FULL'].fillna('').str.upper() + " DOI " + M['DI'].fillna('').str.upper()
+    label_col = 'SR_FULL' if 'SR_FULL' in M.columns else 'SR'
+
+    M['LABEL'] = (
+    M[label_col].fillna('').astype(str).str.upper()
+    + " DOI "
+    + M['DI'].fillna('').astype(str).str.upper())
+
     M['LABEL'] = M['LABEL'].str.strip()
     CR_df['LABEL'] = CR_df['SR'].fillna('').str.upper() + " DOI " + CR_df['DI'].fillna('').str.upper()
     CR_df['LABEL'] = CR_df['LABEL'].str.strip()
 
+    
+
     # Match references with papers (left join as in R)
-    L = pd.merge(M, CR_df, on='LABEL', how='left', suffixes=('_M', '_CR'))
-    L = L[L['Paper_CR'].notnull()]
-    L['CITING'] = M.loc[L['Paper_CR'], 'LABEL'].values
-    L['nCITING'] = M.loc[L['Paper_CR'], 'nLABEL'].values
-    L['CIT_PY'] = M.loc[L['Paper_CR'], 'PY'].values
+    L = pd.merge(
+    M,
+    CR_df,
+    left_on="UT_CLEAN",
+    right_on="UT_CLEAN",
+    how="left"
+)
+
+    
+    L = L[L["Paper_y"].notnull()]
+    L["CITING"] = M.loc[L["Paper_x"].astype(int), "UT"].values
+    L["CITED"]  = M.loc[L["Paper_y"].astype(int), "UT"].values
+    L["CIT_PY"] = M.loc[L["Paper_x"].astype(int), "PY"].values
 
     # Compute Local Citation Scores (LCS)
-    LCS = L.groupby('nLABEL').size().reset_index(name='LCS')
-    M['LCS'] = M['nLABEL'].map(LCS.set_index('nLABEL')['LCS']).fillna(0).astype(int)
+    LCS = L.groupby('Paper_y').size().reset_index(name='LCS')
 
-    # Prepare histData
-    histData = M[M['TC'] >= min_citations][['LABEL', 'TI', 'DE', 'ID', 'DI', 'PY', 'LCS', 'TC']]
-    histData.columns = ['Paper', 'Title', 'Author_Keywords', 'KeywordsPlus', 'DOI', 'Year', 'LCS', 'GCS']
+    M['LCS'] = (
+    M.index.to_series()
+    .map(LCS.set_index('Paper_y')['LCS'])
+    .fillna(0)
+    .astype(int))
 
+    # Prepare histData
+    histData = M[M['TC'] >= min_citations][
+    ['UT', 'TI', 'DE', 'ID', 'DI', 'PY', 'LCS', 'TC']
+    ].copy()
+
+    histData.columns = [
+    'Paper',
+    'Title',
+    'Author_Keywords',
+    'KeywordsPlus',
+    'DOI',
+    'Year',
+    'LCS',
+    'GCS'
+]
     WLCR = None
     if network:
         # Build citation network
-        CITING = L.groupby('CITING').agg(
-            LCR=('LABEL', lambda x: ';'.join(x.dropna())),
-            PY=('CIT_PY', 'first'),
-            Paper=('Paper_CR', 'first')
-        ).reset_index().sort_values(by='PY')
+        CITING = L.groupby("CITING").agg(
+        LCR=("CITED", lambda x: ";".join(x.astype(str))),
+        PY=("CIT_PY", "first"),
+        Paper=("Paper_x", "first")
+    ).reset_index().sort_values(by="PY")
 
         # Assign LCR to the correct Paper index (Paper is 0-based)
         M['LCR'] = ""
@@ -127,10 +194,17 @@ def wos(M, min_citations, sep, network):
         M.index = M['LABEL'].str.strip()
 
         M['LCR'] = M['LCR'].fillna('')
-
+    
         # Ensure all papers are included as both rows and columns
-        WLCR = cocMatrix(reactive.Value(M), Field="LCR", sep=sep)
+        WLCR = cocMatrix(M, Field="LCR", sep=sep)
         
+        if WLCR is None:
+          print("No Local Citation Relationships found.")
+          return {
+           "histData": pd.DataFrame(),
+           "M": M,
+           "LCS": []
+           }
         # Trova le LABEL mancanti
         missing_LABEL = set(M.index) - set(WLCR.columns)
         
diff --git a/www/services/metatagextraction.py b/www/services/metatagextraction.py
index 5e1f8b9c8..bdbeb2311 100644
--- a/www/services/metatagextraction.py
+++ b/www/services/metatagextraction.py
@@ -2,19 +2,8 @@
 
 
 def metaTagExtraction(df, Field="AU_CO", sep=";", aff_disamb=False):
-    """
-    Extract metadata tags from a DataFrame based on the specified field.
-    
-    Args:
-        df: A DataFrame object containing the data.
-        Field: The field to extract metadata tags from.
-        sep: The separator used to split the metadata tags.
-        aff_disamb: A boolean value indicating whether to disambiguate the affiliations.
-    
-    Returns:
-        A DataFrame with the extracted metadata tags.
-    """
-    M = df.get()
+ 
+    M = df
 
     if Field == "SR":
         M = SR(M)
@@ -41,34 +30,58 @@ def metaTagExtraction(df, Field="AU_CO", sep=";", aff_disamb=False):
             a = ind[ind > -1].index
             M.loc[a, "AU1_UN"] = M.loc[a, "AU1_UN"].str[ind[a] + 2:]
 
-    df.set(M)
+    df = M
     
     return df
 
 
 def SR(M):
     listAU = M["AU"].apply(lambda l: [x.strip() for x in l])
+
     if M["DB"].iloc[0].lower() == "scopus":
-        listAU = listAU.apply(lambda l: [x.replace(" ", ",").replace(",,", ",").replace(" ", "") for x in l])
-    FirstAuthors = listAU.apply(lambda l: l[0] if len(l) > 0 else "NA").str.replace(",", " ")
+        listAU = listAU.apply(
+            lambda l: [
+                x.replace(" ", ",")
+                 .replace(",,", ",")
+                 .replace(" ", "")
+                for x in l
+            ]
+        )
+
+    FirstAuthors = listAU.apply(
+        lambda l: l[0] if len(l) > 0 else "NA"
+    ).str.replace(",", " ")
+
+    # OpenAlex compatibility
+    if "JI" not in M.columns:
+        if "SO" in M.columns:
+            M["JI"] = M["SO"]
+        else:
+            M["JI"] = ""
 
     no_art = M["JI"] == ""
     M.loc[no_art, "JI"] = M.loc[no_art, "SO"]
+
     J9 = M["JI"].str.replace(".", " ", regex=False).str.strip()
+
     SR = FirstAuthors + ", " + M["PY"].astype(str) + ", " + J9
 
     M["SR_FULL"] = SR.str.replace(r"\s+", " ", regex=True)
 
-    st = i = 0
+    st = 0
+    i = 0
+
     while st == 0:
         ind = SR.duplicated()
+
         if ind.any():
             i += 1
             SR[ind] = SR[ind] + "-" + chr(96 + i)
         else:
             st = 1
+
     M["SR"] = SR.str.replace(r"\s+", " ", regex=True)
-    
+
     return M
 
 
diff --git a/www/services/thematicmap.py b/www/services/thematicmap.py
index 3c313b7f6..55194d952 100644
--- a/www/services/thematicmap.py
+++ b/www/services/thematicmap.py
@@ -7,13 +7,13 @@
 def thematic_map(df, field="ID", n=250, minfreq=5, ngrams=1, stemming=False, size=0.5, n_labels=1, community_repulsion=0.1, repel=True, remove_terms=None, synonyms=None, cluster="walktrap", subgraphs=False):
         # df = metaTagExtraction(df, field=field)
         M = df
-        m = df.get()
+    
 
         # Set ngrams based on field
         ngrams = int(ngrams) if field in ['TI', 'AB'] else 1
         # Set stemming as boolean
         stemming = True if stemming == "Yes" else False
-        minfreq = max(0, int(minfreq * len(m) // 1000))
+        minfreq = max(0, int(minfreq * len(M) // 1000))
 
         # Preprocess field and create network matrix
         if field == "ID":
@@ -314,8 +314,7 @@ def thematic_map(df, field="ID", n=250, minfreq=5, ngrams=1, stemming=False, siz
             )
         )
         fig = go.FigureWidget(fig)
-        fig._config = fig._config | {'modeBarButtonsToRemove': ['pan', 'select', 'lasso2d', 'toImage'],
-                                     'displaylogo': False}
+        fig._config = fig._config | {'modeBarButtonsToRemove': ['pan', 'select', 'lasso2d', 'toImage'],'displaylogo': False}
 
         ##############################################################################################################################################
 
@@ -335,7 +334,7 @@ def thematic_map(df, field="ID", n=250, minfreq=5, ngrams=1, stemming=False, siz
         df = df[['Cluster', 'CallonCentrality', 'CallonDensity', 'RankCentrality', 'RankDensity', 'ClusterFrequency']]
 
         # Handle document clustering
-        document_to_clusters = cluster_assignment(M=m, words=df_lab, field=field, remove_terms=remove_terms, synonyms=synonyms, threshold=0.5)
+        document_to_clusters = cluster_assignment(M=M, words=df_lab, field=field, remove_terms=remove_terms, synonyms=synonyms, threshold=0.5)
 
         # Create parameters dictionary and unpack into dataframe
         params = {
@@ -660,14 +659,16 @@ def cluster_assignment(M, words, field, remove_terms=None, synonyms=None, thresh
     year = pd.Timestamp.now().year + 1
     
     M = M.reset_index(drop=True)
-    terms = (M.assign(
+    terms = (
+    M.assign(
         TCpY=lambda x: x['TC']/(year-x['PY']),
         NTC=lambda x: x.groupby('PY')['TC'].transform(lambda y: y/y.mean())
-    )[['DI', 'AU', 'TI', 'SO', 'PY', 'TC', 'TCpY', 'NTC', 'SR']]
-        .merge(terms, on='SR')
-        .fillna(0)
-        .groupby('Assigned_cluster')
-        .apply(lambda x: x.sort_values('TC', ascending=False))
-        .reset_index(drop=True))
-
+    )[['DI','AU','TI','SO','PY','TC','TCpY','NTC','SR']]
+    .merge(terms, on='SR')
+    .fillna(0)
+    .infer_objects(copy=False)
+    .groupby('Assigned_cluster')
+    .apply(lambda x: x.sort_values('TC', ascending=False))
+    .reset_index(drop=True))
+    
     return terms