MIKA
The MIKA package contains three core components for analyzing large sets of narrative text: mika.kd
for
knowldege discovery, mika.ir
for information retrieval, and mika.utils
for utilities. Typically,
a user will load and configure their data using mika.utils
and then use a combination of functions
from mika.kd
and mika.ir
to perform an analysis.
- utils package
- kd package
- mika.kd.FMEA
- mika.kd.Topic_Model_plus
Topic_Model_plus
Topic_Model_plus.bert_topic()
Topic_Model_plus.calc_bert_coherence()
Topic_Model_plus.coherence_scores()
Topic_Model_plus.get_bert_coherence()
Topic_Model_plus.get_bert_topic_diversity()
Topic_Model_plus.get_bert_topics_from_probs()
Topic_Model_plus.hdp()
Topic_Model_plus.hlda()
Topic_Model_plus.hlda_display()
Topic_Model_plus.hlda_extract_models()
Topic_Model_plus.hlda_visual()
Topic_Model_plus.label_hlda_topics()
Topic_Model_plus.label_lda_topics()
Topic_Model_plus.lda()
Topic_Model_plus.lda_extract_models()
Topic_Model_plus.lda_visual()
Topic_Model_plus.load_bert_model()
Topic_Model_plus.reduce_bert_topics()
Topic_Model_plus.save_bert_coherence()
Topic_Model_plus.save_bert_document_topic_distribution()
Topic_Model_plus.save_bert_model()
Topic_Model_plus.save_bert_results()
Topic_Model_plus.save_bert_taxonomy()
Topic_Model_plus.save_bert_topic_diversity()
Topic_Model_plus.save_bert_topics()
Topic_Model_plus.save_bert_topics_from_probs()
Topic_Model_plus.save_bert_vis()
Topic_Model_plus.save_hlda_coherence()
Topic_Model_plus.save_hlda_document_topic_distribution()
Topic_Model_plus.save_hlda_level_n_taxonomy()
Topic_Model_plus.save_hlda_models()
Topic_Model_plus.save_hlda_results()
Topic_Model_plus.save_hlda_taxonomy()
Topic_Model_plus.save_hlda_topics()
Topic_Model_plus.save_lda_coherence()
Topic_Model_plus.save_lda_document_topic_distribution()
Topic_Model_plus.save_lda_models()
Topic_Model_plus.save_lda_results()
Topic_Model_plus.save_lda_taxonomy()
Topic_Model_plus.save_lda_topics()
Topic_Model_plus.save_mixed_taxonomy()
- mika.kd.NER
align_labels_with_tokens()
build_confusion_matrix()
check_doc_to_sentence_split()
clean_annots_from_str()
clean_doccano_annots()
clean_text_tags()
compute_classification_report()
compute_metrics()
get_cleaned_label()
identify_bad_annotations()
plot_eval_metrics()
plot_eval_results()
plot_loss()
read_doccano_annots()
read_trainer_logs()
split_docs_to_sentences()
tokenize()
tokenize_and_align_labels()
- mika.kd.trend_analysis
add_hazards_to_docs()
bootstrap_metric()
build_word_clouds()
calc_CI()
calc_classification_metrics()
calc_rate()
calc_severity_per_hazard()
check_for_hazard_words()
check_for_negation_words()
chi_squared_tests()
corr_sig()
create_correlation_matrix()
examine_hazard_extraction_mismatches()
get_doc_text()
get_doc_time()
get_hazard_df()
get_hazard_doc_ids()
get_hazard_info()
get_hazard_topics()
get_hazard_topics_per_doc()
get_hazard_words()
get_likelihood_FAA()
get_likelihood_USFS()
get_negation_words()
get_results_info()
get_topics_per_doc()
get_word_frequencies()
hazard_accuracy()
identify_docs_per_fmea_row()
identify_docs_per_hazard()
make_pie_chart()
minmax_scale()
multiple_reg_feature_importance()
plot_USFS_risk_matrix()
plot_frequency_time_series()
plot_metric_averages()
plot_metric_time_series()
plot_predictors()
plot_risk_matrix()
proposed_topics()
record_hazard_doc_info()
regression_feature_importance()
remove_outliers()
reshape_correlation_matrix()
sample_for_accuracy()
set_up_docs_per_hazard_vars()
- ir package