simbianai · mishaxmishra · Oct 23, 2025 · Oct 23, 2025
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "taskgen-ai"
-version = "3.4.0"
+version = "3.4.1"
 authors = [
   { name="John Tan Chong Min", email="tanchongmin@gmail.com" },
 ]
@@ -18,7 +18,7 @@ classifiers = [
 ]
 dependencies = ["openai>=1.59.6",
 "langchain", "dill>=0.3.9", "termcolor>=3.1.0", "requests",
-"pypdf~=6.0.0", "python-docx", "pandas", "xlrd",
+"python-docx", "pandas", "xlrd",
 "asyncio", "opentelemetry-sdk~=1.32.1"]
 
 [project.urls]

diff --git a/requirements.txt b/requirements.txt
@@ -3,7 +3,6 @@ langchain
 dill>=0.3.9
 termcolor>=3.1.0
 requests
-pypdf~=6.0.0
 python-docx
 pandas
 xlrd

diff --git a/setup.py b/setup.py
@@ -2,15 +2,14 @@
 
 setup(
     name="taskgen",
-    version="3.4.0",
+    version="3.4.1",
     packages=find_packages(),
     install_requires=[
         "openai>=1.59.6",
         "langchain",
         "dill>=0.3.9",
         "termcolor>=3.1.0",
         "requests",
-        "pypdf~=6.0.0",
         "python-docx",
         "pandas",
         "xlrd",

diff --git a/taskgen/memory.py b/taskgen/memory.py
@@ -4,7 +4,6 @@
 import os
 import time
 from typing import Any
-import pypdf
 from docx import Document
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 # import chromadb
@@ -58,8 +57,8 @@ def read_file(self, filepath, text_splitter=None):
             text = pd.read_csv(filepath).to_string()
         elif ".docx" in filepath:
             text = self.read_docx(filepath)
-        elif ".pdf" in filepath:
-            text = self.read_pdf(filepath)
+        # elif ".pdf" in filepath:
+        #     text = self.read_pdf(filepath)
         else:
             raise ValueError(
                 "File type not spported, supported file types: pdf, docx, csv, xls"
@@ -78,18 +77,18 @@ def read_file(self, filepath, text_splitter=None):
         memories = [{"content": text, "filepath": filepath} for text in texts]
         return memories
 
-    def read_pdf(self, filepath):
-        # Open the PDF file
-        text_list = []
-        with open(filepath, "rb") as file:
-            pdf_reader = pypdf.PdfReader(file)
-            for page in pdf_reader.pages:
-                page_text = page.extract_text()
-                if page_text:  # Ensure there's text on the page
-                    text_list.append(page_text)
-                else:
-                    print("No text found on page")
-        return "\n".join(text_list)
+    # def read_pdf(self, filepath):
+    #     # Open the PDF file
+    #     text_list = []
+    #     with open(filepath, "rb") as file:
+    #         pdf_reader = pypdf.PdfReader(file)
+    #         for page in pdf_reader.pages:
+    #             page_text = page.extract_text()
+    #             if page_text:  # Ensure there's text on the page
+    #                 text_list.append(page_text)
+    #             else:
+    #                 print("No text found on page")
+    #     return "\n".join(text_list)
 
     def read_docx(self, filepath):
         doc = Document(filepath)