Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "taskgen-ai"
version = "3.4.0"
version = "3.4.1"
authors = [
{ name="John Tan Chong Min", email="tanchongmin@gmail.com" },
]
Expand All @@ -18,7 +18,7 @@ classifiers = [
]
dependencies = ["openai>=1.59.6",
"langchain", "dill>=0.3.9", "termcolor>=3.1.0", "requests",
"pypdf~=6.0.0", "python-docx", "pandas", "xlrd",
"python-docx", "pandas", "xlrd",
"asyncio", "opentelemetry-sdk~=1.32.1"]

[project.urls]
Expand Down
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ langchain
dill>=0.3.9
termcolor>=3.1.0
requests
pypdf~=6.0.0
python-docx
pandas
xlrd
Expand Down
3 changes: 1 addition & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,14 @@

setup(
name="taskgen",
version="3.4.0",
version="3.4.1",
packages=find_packages(),
install_requires=[
"openai>=1.59.6",
"langchain",
"dill>=0.3.9",
"termcolor>=3.1.0",
"requests",
"pypdf~=6.0.0",
"python-docx",
"pandas",
"xlrd",
Expand Down
29 changes: 14 additions & 15 deletions taskgen/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import os
import time
from typing import Any
import pypdf
from docx import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
# import chromadb
Expand Down Expand Up @@ -58,8 +57,8 @@ def read_file(self, filepath, text_splitter=None):
text = pd.read_csv(filepath).to_string()
elif ".docx" in filepath:
text = self.read_docx(filepath)
elif ".pdf" in filepath:
text = self.read_pdf(filepath)
# elif ".pdf" in filepath:
# text = self.read_pdf(filepath)
else:
raise ValueError(
"File type not spported, supported file types: pdf, docx, csv, xls"
Expand All @@ -78,18 +77,18 @@ def read_file(self, filepath, text_splitter=None):
memories = [{"content": text, "filepath": filepath} for text in texts]
return memories

def read_pdf(self, filepath):
# Open the PDF file
text_list = []
with open(filepath, "rb") as file:
pdf_reader = pypdf.PdfReader(file)
for page in pdf_reader.pages:
page_text = page.extract_text()
if page_text: # Ensure there's text on the page
text_list.append(page_text)
else:
print("No text found on page")
return "\n".join(text_list)
# def read_pdf(self, filepath):
# # Open the PDF file
# text_list = []
# with open(filepath, "rb") as file:
# pdf_reader = pypdf.PdfReader(file)
# for page in pdf_reader.pages:
# page_text = page.extract_text()
# if page_text: # Ensure there's text on the page
# text_list.append(page_text)
# else:
# print("No text found on page")
# return "\n".join(text_list)

def read_docx(self, filepath):
doc = Document(filepath)
Expand Down