Source code for mdvtools.tests.test_multitext

from mdvtools.mdvproject import MDVProject
from tempfile import TemporaryDirectory
import pandas as pd



[docs]
def test_compliant_multitext():
    data = pd.DataFrame({"test_data": ["a", "b", "a; b", ""]})

    # todo: TempProject more general pattern for tests like this / staging uploads?
    with TemporaryDirectory() as dir:
        p = MDVProject(dir)
        # as of this writing, an error is routinely raised internally when 't' is not already a datasource
        # p.add_datasource('t', data, columns=[{'name': 'test_data', 'datatype': 'text'}])
        p.add_datasource("t", data, columns=[{"name": "test_data", "datatype": "text"}])
        different_cols = p.get_column("t", "test_data") != data["test_data"]
        assert not isinstance(different_cols, bool)
        assert not any(different_cols)
        # but the more serious issue is that it doesn't follow 'multitext' control flow properly,
        # and goes down a float32 path.
        p.add_datasource(
            "mt",
            data,
            columns=[{"name": "test_data", "datatype": "multitext", "separator": ";"}],
        )
        assert p.datasources[1]["columns"][0]["datatype"] == "multitext"
        different_cols = p.get_column("mt", "test_data") != data["test_data"]
        assert not isinstance(different_cols, bool)
        assert not any(different_cols)

        # this method is implicated in not gettting the right data type to send to the client
        # byte_data = p.get_byte_data([{ 'datasource': 'mt', 'column': 'test_data' }], None)  # <HDF5 dataset "__tags": shape (N,), type "<u2">
        # <u2 is the NumPy little-endian unsigned 2-byte integer datatype, seems right for text16
        # for 'text' <HDF5 dataset "__tags": shape (N,), type "|u1">
        # todo catch the error with __tags saved from frontend...


if __name__ == "__main__":
    test_compliant_multitext()