Skip to content

Issues in DCTE model training #105

@williamlhy

Description

@williamlhy

When I wanted to use DCTE model (here I used local model):

from stream_topic.models import DCTE
from stream_topic.utils import TMDataset
dataset = TMDataset()
dataset.fetch_dataset(name="BBC_News",dataset_path = "/hongyi/STREAM/stream_topic/stream_topic_data/preprocessed_datasets/BBC_News",source = 'local')
dataset.preprocess(model_type="DCTE")
model = DCTE(model="/hongyi/stream/sentence-transformers/paraphrase-MiniLM-L3-v2")
model.fit(dataset)
topics = model.get_topics()

After "Training DCTE topic model" was completed, I got an error message:

Loading best SentenceTransformer model from step 11868.
***** Running evaluation *****
2024-12-19 15:35:54.028 | ERROR    | stream_topic.models.DCTE:fit:223 - Error in training: Couldn't find a module script at /hongyi/STREAM/accuracy/accuracy.py. Module 'accuracy' doesn't exist on the Hugging Face Hub either.
---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
Cell In[4], line 3
      1 from stream_topic.models import KmeansTM,CEDC, ETM,DCTE,LDA,ProdLDA,NSTM,CTM,CTMNeg,CBC,BERTopicTM,TNTM,SOMTM,WordCluTM
      2 model = DCTE(model="/hongyi/stream/sentence-transformers/paraphrase-MiniLM-L3-v2")#word_embedding_model_name=
----> 3 model.fit(dataset)#
      5 topics = model.get_topics()
      6 print(topics)

File ~/STREAM/stream_topic/models/DCTE.py:220, in DCTE.fit(self, dataset, val_split, **training_args)
    218     self.trainer.train()
    219     # evaluate accuracy
--> 220     metrics = self.trainer.evaluate()
    222 except Exception as e:
    223     logger.error(f"Error in training: {e}")

File ~/anaconda3/envs/mystream/lib/python3.10/site-packages/setfit/trainer.py:844, in Trainer.evaluate(self, dataset, metric_key_prefix)
    842 if isinstance(self.metric, str):
    843     metric_config = "multilabel" if self.model.multi_target_strategy is not None else None
--> 844     metric_fn = evaluate.load(self.metric, config_name=metric_config)
    846     results = metric_fn.compute(predictions=y_pred, references=y_test, **metric_kwargs)
    848 elif callable(self.metric):

File ~/anaconda3/envs/mystream/lib/python3.10/site-packages/evaluate/loading.py:748, in load(path, config_name, module_type, process_id, num_process, cache_dir, experiment_id, keep_in_memory, download_config, download_mode, revision, **init_kwargs)
    703 """Load a [`~evaluate.EvaluationModule`].
    704 
    705 Args:
   (...)
    745     ```
    746 """
    747 download_mode = DownloadMode(download_mode or DownloadMode.REUSE_DATASET_IF_EXISTS)
--> 748 evaluation_module = evaluation_module_factory(
    749     path, module_type=module_type, revision=revision, download_config=download_config, download_mode=download_mode
    750 )
    751 evaluation_cls = import_main_class(evaluation_module.module_path)
    752 evaluation_instance = evaluation_cls(
    753     config_name=config_name,
    754     process_id=process_id,
   (...)
    760     **init_kwargs,
    761 )

File ~/anaconda3/envs/mystream/lib/python3.10/site-packages/evaluate/loading.py:681, in evaluation_module_factory(path, module_type, revision, download_config, download_mode, force_local_path, dynamic_modules_path, **download_kwargs)
    679         if not isinstance(e1, (ConnectionError, FileNotFoundError)):
    680             raise e1 from None
--> 681         raise FileNotFoundError(
    682             f"Couldn't find a module script at {relative_to_absolute_path(combined_path)}. "
    683             f"Module '{path}' doesn't exist on the Hugging Face Hub either."
    684         ) from None
    685 else:
    686     raise FileNotFoundError(f"Couldn't find a module script at {relative_to_absolute_path(combined_path)}.")

FileNotFoundError: Couldn't find a module script at /hongyi/STREAM/accuracy/accuracy.py. Module 'accuracy' doesn't exist on the Hugging Face Hub either.

@AnFreTh Could you take a look at this issue?

Metadata

Metadata

Assignees

Labels

No labels
No labels

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions