ingestion¶
- class virtualitics_sdk.nlp.ingestion.DataDiagnostic(data_upload_step, pipeline_config_step, data_filtering_step, advanced_dash=False, downsample_data=False, num_samples=10000, sample_percentage=0.5)¶
Bases:
Step
- build_advanced_dashboard(nlp_stats_df, store_interface, nlp_module, current_page)¶
- build_simple_dashboard(nlp_stats_df, store_interface, nlp_module, current_page)¶
- default_narrative = 'joint_narrative_f'¶
- static filter_by_date(pandas_df, dt_col, range_vals)¶
- get_dependencies(store_interface)¶
- logger = <Logger DataDiagnostic (INFO)>¶
- main_section = 'Data Diagnostic'¶
- merge_narrative_features(input_df, cols, drop=False)¶
- run(flow_metadata)¶
- class virtualitics_sdk.nlp.ingestion.DateTimeFiltering(pipeline_config_step, data_upload_step)¶
Bases:
Step
- static cast_column(df_column, date_format)¶
- Return type:
Series
- dt_range = 'Document Time Window'¶
- get_dependencies(store_interface)¶
- logger = <Logger DateTimeFiltering (INFO)>¶
- static produce_plot_image(x, y, img_title='', img_descr='')¶
- run(flow_metadata)¶
- virtualitics_sdk.nlp.ingestion.create_average_dep_depth_hist(nlp_stats_df)¶
- virtualitics_sdk.nlp.ingestion.doc_len_infographic(nlp_stats)¶
- Return type:
Tuple
[InfographData
,InfographData
]
- virtualitics_sdk.nlp.ingestion.empty_docs_infographic(nlp_stats)¶
- Return type:
- virtualitics_sdk.nlp.ingestion.events_entities_infographics(nlp_stats)¶
- Return type:
List
[InfographData
]
- virtualitics_sdk.nlp.ingestion.generate_pos_summary_df(nlp_stats_df)¶
- virtualitics_sdk.nlp.ingestion.mean_dependency_tree_depth(nlp_stats)¶
- Return type:
- virtualitics_sdk.nlp.ingestion.mean_sents_info(nlp_stats, threshold=2.5)¶
- Return type:
- virtualitics_sdk.nlp.ingestion.mean_unique_words(nlp_stats)¶
- Return type:
- virtualitics_sdk.nlp.ingestion.sent_len_infographic(nlp_stats)¶
- Return type:
Tuple
[InfographData
,InfographData
]
- virtualitics_sdk.nlp.ingestion.unique_ents_features_in_corpus(ents_table)¶
- virtualitics_sdk.nlp.ingestion.unique_events_features_in_corpus(events_table)¶