test_TextClassifier_integration.py

Toggle Theme

      from sklearn.metrics import accuracy_score


def test_end_to_end_pipeline_integration(trained_classifier):
    """
    Integration test for the full text classification pipeline.
    Tests data preprocessing, vectorization, and prediction working together.
    Instead of exact prediction match, checks if accuracy is above a threshold.
    """
    # Simulate new raw input data
    new_raw_texts = [
        "This movie was truly fantastic!",
        "I absolutely hated the ending.",
        "It was okay, nothing special.",
        "A brilliant and engaging story.",
        "The best product I've ever used.",
        "Completely awful, do not recommend.",
    ]

    # True labels for the integration test data to calculate accuracy
    true_integration_labels = [
        "positive",
        "negative",
        "positive",
        "positive",
        "positive",
        "negative",
    ]

    # Run the full prediction pipeline to get actual predictions
    actual_predictions = trained_classifier.predict(new_raw_texts)

    # Calculate accuracy for the integration test
    integration_accuracy = accuracy_score(true_integration_labels, actual_predictions)

    # Set a threshold for acceptable integration performance
    expected_min_integration_accuracy = (
        0.65  # Adjust based on expected model performance
    )

    assert (
        integration_accuracy >= expected_min_integration_accuracy
    ), f"Integration test accuracy dropped to {integration_accuracy:.2f}, expected >= {expected_min_integration_accuracy:.2f}"