NLP-based system for analyzing literature from Project Gutenberg using topic modeling, similarity analysis, clustering, and more.
python3 -m venv venv
source venv/bin/activate
pip install -r requirements.txt├── lib/
│ ├── __init__.py
│ ├── card.py
│ ├── entities.py
│ ├── lexdiv.py
│ ├── scrapper.py
│ ├── similar.py
│ ├── summarize.py
│ ├── tools_nlp.py
│ └── topics_f.py
├── books/
├── venv/
├── .gitignore
├── banner.png
├── bookworm.py
├── README.md
├── requirements.txt
python bookworm.py --info <book_id> # Get book metadata
python bookworm.py --download <book_id> # Download book from Project Gutenbergpython bookworm.py --topics <book_id> # Extract top topics per section
python bookworm.py --topics_lda <book_id> # Topic modeling with LDA
python bookworm.py --topics_lsa <book_id> # Topic modeling with LSA
python bookworm.py --similar <book_id> # Find similar books
python bookworm.py --entities <book_id> # Extract named entities
python bookworm.py --summarize <book_id> # Summarize a book
python bookworm.py --card <book_id> # Book info card
python bookworm.py --lexdiv <book_id> # Lexical diversity metricspython bookworm.py --download 11
python bookworm.py --similar 11
python bookworm.py --topics 11