Source code for mdvtools.tests.example_mock_usage

#!/usr/bin/env python3
"""
Example usage of the mock AnnData module.

This script demonstrates how to use the MockAnnDataFactory and related utilities
for creating test data and stress testing the MDV conversion pipeline.
"""

import os
import tempfile
import shutil
from contextlib import contextmanager

from mdvtools.conversions import convert_scanpy_to_mdv
from mdvtools.mdvproject import MDVProject
from .mock_anndata import (
    MockAnnDataFactory,
    create_minimal_anndata,
    create_realistic_anndata,
    create_large_anndata,
    create_edge_case_anndata,
    suppress_anndata_warnings,
    get_anndata_summary,
    validate_anndata
)


@contextmanager
[docs] def temp_mdv_project(): """Context manager for temporary MDV project creation and cleanup.""" test_dir = tempfile.mkdtemp() try: yield test_dir finally: if os.path.exists(test_dir): shutil.rmtree(test_dir)
[docs] def example_basic_usage(): """Example of basic usage with convenience functions.""" print("=== Basic Usage Examples ===") # Create minimal AnnData for quick testing print("\n1. Creating minimal AnnData...") adata_minimal = create_minimal_anndata(n_cells=20, n_genes=10) print(f" Created: {adata_minimal.n_obs} cells x {adata_minimal.n_vars} genes") # Create realistic AnnData with typical features print("\n2. Creating realistic AnnData...") adata_realistic = create_realistic_anndata(n_cells=500, n_genes=1000) print(f" Created: {adata_realistic.n_obs} cells x {adata_realistic.n_vars} genes") print(f" Has PCA: {'X_pca' in adata_realistic.obsm}") print(f" Has UMAP: {'X_umap' in adata_realistic.obsm}") print(f" Has layers: {list(adata_realistic.layers.keys())}") # Create large AnnData for stress testing print("\n3. Creating large AnnData...") adata_large = create_large_anndata(n_cells=2000, n_genes=3000) print(f" Created: {adata_large.n_obs} cells x {adata_large.n_vars} genes") print(f" Is sparse: {hasattr(adata_large.X, 'toarray')}") # Create edge case AnnData print("\n4. Creating edge case AnnData...") adata_edge = create_edge_case_anndata() print(f" Created: {adata_edge.n_obs} cells x {adata_edge.n_vars} genes") print(f" Edge case columns: {list(adata_edge.obs.columns)}")
[docs] def example_factory_usage(): """Example of using the MockAnnDataFactory class.""" print("\n=== Factory Usage Examples ===") # Create factory with fixed random seed for reproducible results factory = MockAnnDataFactory(random_seed=42) # Create AnnData with specific features print("\n1. Creating AnnData with custom cell types...") custom_cell_types = ['Neuron', 'Astrocyte', 'Oligodendrocyte', 'Microglia'] custom_conditions = ['Control', 'Disease', 'Treatment'] adata_custom = factory.create_with_specific_features( cell_types=custom_cell_types, conditions=custom_conditions, n_cells=100, n_genes=200, add_missing=True ) print(f" Cell types: {list(adata_custom.obs['cell_type'].cat.categories)}") print(f" Conditions: {list(adata_custom.obs['condition'].cat.categories)}") # Create different sizes for testing print("\n2. Creating datasets of different sizes...") sizes = [(50, 25), (200, 100), (1000, 500)] for n_cells, n_genes in sizes: adata = factory.create_realistic(n_cells, n_genes) summary = get_anndata_summary(adata) print(f" {n_cells}x{n_genes}: {summary['n_cells']} cells, {summary['n_genes']} genes")
[docs] def example_conversion_testing(): """Example of testing MDV conversion with mock data.""" print("\n=== Conversion Testing Examples ===") factory = MockAnnDataFactory(random_seed=42) # Test conversion with minimal data print("\n1. Testing minimal data conversion...") adata_minimal = factory.create_minimal(50, 25) with temp_mdv_project() as test_dir: with suppress_anndata_warnings(): mdv = convert_scanpy_to_mdv(test_dir, adata_minimal, delete_existing=True) print(f" Conversion successful: {isinstance(mdv, MDVProject)}") print(f" Datasources: {mdv.get_datasource_names()}") # Test conversion with realistic data print("\n2. Testing realistic data conversion...") adata_realistic = factory.create_realistic(200, 100) with temp_mdv_project() as test_dir: with suppress_anndata_warnings(): mdv = convert_scanpy_to_mdv(test_dir, adata_realistic, delete_existing=True) # Check that all metadata was converted cells_metadata = mdv.get_datasource_metadata("cells") genes_metadata = mdv.get_datasource_metadata("genes") print(f" Cells columns: {len(cells_metadata['columns'])}") print(f" Genes columns: {len(genes_metadata['columns'])}") # Test conversion with edge cases print("\n3. Testing edge case conversion...") adata_edge = factory.create_edge_cases() with temp_mdv_project() as test_dir: with suppress_anndata_warnings(): mdv = convert_scanpy_to_mdv(test_dir, adata_edge, delete_existing=True) print(f" Edge case conversion successful: {isinstance(mdv, MDVProject)}")
[docs] def example_validation_and_summary(): """Example of using validation and summary utilities.""" print("\n=== Validation and Summary Examples ===") factory = MockAnnDataFactory(random_seed=42) # Create and validate different types of AnnData datasets = [ ("minimal", factory.create_minimal(20, 10)), ("realistic", factory.create_realistic(100, 50)), ("large", factory.create_large(500, 200)), ("edge_cases", factory.create_edge_cases()) ] for name, adata in datasets: print(f"\n{name.capitalize()} dataset:") # Validate structure is_valid = validate_anndata(adata) print(f" Valid: {is_valid}") # Get summary summary = get_anndata_summary(adata) print(f" Size: {summary['n_cells']} cells x {summary['n_genes']} genes") print(f" Categorical obs: {len(summary['categorical_obs'])}") print(f" Categorical var: {len(summary['categorical_var'])}") print(f" Has missing obs: {summary['has_missing_obs']}") print(f" Has missing var: {summary['has_missing_var']}") print(f" Is sparse: {summary['sparse']}")
[docs] def example_stress_testing(): """Example of stress testing with large datasets.""" print("\n=== Stress Testing Examples ===") factory = MockAnnDataFactory(random_seed=42) # Test with progressively larger datasets test_sizes = [ (100, 50, "small"), (500, 200, "medium"), (1000, 500, "large"), (2000, 1000, "very large") ] for n_cells, n_genes, size_name in test_sizes: print(f"\nTesting {size_name} dataset ({n_cells} cells x {n_genes} genes)...") # Create dataset adata = factory.create_realistic(n_cells, n_genes) # Test conversion with temp_mdv_project() as test_dir: with suppress_anndata_warnings(): mdv = convert_scanpy_to_mdv(test_dir, adata, delete_existing=True) # Verify conversion cells_metadata = mdv.get_datasource_metadata("cells") genes_metadata = mdv.get_datasource_metadata("genes") print(" Conversion successful") print(f" Cells columns: {len(cells_metadata['columns'])}") print(f" Genes columns: {len(genes_metadata['columns'])}")
[docs] def main(): """Run all examples.""" print("Mock AnnData Module Usage Examples") print("=" * 50) try: example_basic_usage() example_factory_usage() example_conversion_testing() example_validation_and_summary() example_stress_testing() print("\n" + "=" * 50) print("All examples completed successfully!") except Exception as e: print(f"\nError running examples: {e}") raise
if __name__ == "__main__": main()