from flask import (
Flask,
# Blueprint,
render_template,
request,
make_response,
send_file,
Response,
jsonify,
current_app
)
import webbrowser
import mimetypes
import json
import sys
import re
from werkzeug.security import safe_join
from mdvtools.websocket import mdv_socketio
from mdvtools.mdvproject import MDVProject
from mdvtools.project_router import (
ProjectBlueprint as Blueprint,
SingleProjectShim
)
import os
import pandas as pd
from typing import Optional
from datetime import datetime, timedelta
import threading
import scanpy as sc
from mdvtools.conversions import convert_scanpy_to_mdv
# consider using flask_cors...
# flask send_file can't always cope with relative paths
# sets the cwd to the python path for some reason
[docs]
def _send_file(f):
if not os.path.isabs(f):
f = os.path.join(os.getcwd(), f)
return send_file(f)
[docs]
def get_range(file_name, range_header):
file = open(file_name, "rb")
size = sys.getsizeof(file_name)
byte1, byte2 = 0, None
m = re.search(r"(\d+)-(\d*)", range_header)
if not m:
raise Exception("Invalid Range Header")
g = m.groups()
if g[0]:
byte1 = int(g[0])
if g[1]:
byte2 = int(g[1])
length = size - byte1
if byte2 is not None:
length = byte2 - byte1 + 1
file.seek(byte1)
data = file.read(length)
rv = Response(
data, 206, mimetype=mimetypes.guess_type(file_name)[0], direct_passthrough=True
)
rv.headers.add(
"Content-Range", "bytes {0}-{1}/{2}".format(byte1, byte1 + length - 1, size)
)
rv.headers.add("Accept-Ranges", "bytes")
file.close()
return rv
[docs]
def create_app(
project: MDVProject,
open_browser=True,
port=5050,
websocket=False,
app: Optional[Flask] = None,
backend_db=False,
):
if app is None:
route = ""
# route = "/project/" + project.name # for testing new API with simple app...
app = Flask(__name__)
print(f"created Flask {app}")
# add headers to allow web workers
app.after_request(add_safe_headers)
project_bp = SingleProjectShim(app)
multi_project = False
# nb, may make this default to False again soon.
### 'MEW' in Unity is using IWeb PostMessage, not WebSockets.
### but this will be used for local testing in short-term, and potentially other things later.
if websocket:
mdv_socketio(app)
else:
## nb - previous use of flask.Blueprint was not allowing new projects at runtime
## we substitute this with our own ProjectBlueprint class, which is a drop-in replacement
## but we should add more tests to ensure it behaves as expected...
# add routes for this project to existing app
# set the route prefix to the project name, derived from the dir name.
# this is to allow multiple projects to be served from the same server.
multi_project = True
route = "/project/" + project.id + "/"
if backend_db:
from mdvtools.project_router import ProjectBlueprint_v2 as Blueprint_v2
print("backend_db is True")
project_bp = Blueprint_v2(project.id, __name__, url_prefix=route)
else:
project_bp = Blueprint(project.id, __name__, url_prefix=route)
# if route in routes:
# raise Exception(
# "Route already exists - can't have two projects with the same name"
# )
routes.add(route)
@project_bp.route("/")
def project_index():
print("recieved request to project_index")
# the backend page currently needs to be different to workaround a server config issue
# some requests were being downgraded to http, which caused problems with the backend
# but if we always add the header it messes up localhost development.
# todo if necessary, apply equivalent change to index.html / any other pages we might have
return render_template("page.html", route=route, backend=backend_db)
@project_bp.route("/<file>.b")
def get_binary_file(file):
# should this now b '.gz'?
file_name = safe_join(project.dir, file + ".b")
range_header = request.headers.get("Range", None)
return get_range(file_name, range_header)
# duplicate of above, but for .gz files in case that's needed.
# (there was some reason for changing to this, but I can't fully remember the status
# so maybe better to support both for now)
@project_bp.route("/<file>.gz")
def get_binary_file_gz(file):
file_name = safe_join(project.dir, file + ".gz")
range_header = request.headers.get("Range", None)
return get_range(file_name, range_header)
@project_bp.route("/<file>.json")
def get_json_file(file: str):
if project.dir is None:
return "Project directory not found", 404
path = safe_join(project.dir, file + ".json")
if path is None or not os.path.exists(path):
return "File not found", 404
return _send_file(path)
# gets the raw byte data and packages it in the correct response
@project_bp.route("/get_data", methods=["POST"])
def get_data():
try:
data = request.json
if not data or "columns" not in data or "data_source" not in data:
raise Exception(
"Request must contain JSON with 'columns' and 'data_source'"
)
bytes_ = project.get_byte_data(data["columns"], data["data_source"])
response = make_response(bytes_)
response.headers.set("Content-Type", "application/octet-stream")
return response
except Exception as e:
print(e)
return "Problem handling request", 400
# images contained in the project
@project_bp.route("/images/<path:path>")
def images(path):
try:
return _send_file(project.get_image(path))
except Exception:
return _send_file(safe_join(project.imagefolder, path))
# All the project's metadata
@project_bp.route("/get_configs", methods=["GET", "POST"])
def get_configs():
return jsonify(project.get_configs())
# gets a particular view
@project_bp.route("/get_view", methods=["POST"])
def get_view():
data = request.json
if not data or "view" not in data:
return "Request must contain JSON with 'view'", 400
return jsonify(project.get_view(data["view"]))
# get any custom row data
@project_bp.route("/get_row_data", methods=["POST"])
def get_row_data():
req = request.json
if req is None:
return json.dumps({"data": None})
path = safe_join(
project.dir, "rowdata", req["datasource"], f"{req['index']}.json"
)
if path is None or not os.path.exists(path):
return json.dumps({"data": None})
with open(path) as f:
if f is None:
return json.dumps({"data": None})
return f.read()
# get arbitrary data
@project_bp.route("/get_binary_data", methods=["POST"])
def get_binary_data():
req = request.json
try:
if req is None or "datasource" not in req or "name" not in req:
return "Request must contain JSON with 'datasource' and 'name'", 400
if project.dir is None or not os.path.exists(project.dir):
return "Project directory not found", 404
path = safe_join(
project.dir, "binarydata", req["datasource"], f"{req['name']}.gz"
)
if path is None or not os.path.exists(path):
return "Binary data not found", 404
with open(path, "rb") as f:
data = f.read()
except Exception:
# data='' # satisfy type checker - was None, haven't tested if this is better or worse.
# probably better to return an error.
return "Problem getting binary data", 500
return data
# only the specified region of track files (bam,bigbed,tabix)
# needs to be returned
@project_bp.route("/tracks/<path:path>")
def send_track(path):
file_name = safe_join(project.trackfolder, path)
range_header = request.headers.get("Range", None)
if not range_header:
return _send_file(file_name)
return get_range(file_name, range_header)
@project_bp.route("/save_state", access_level='editable', methods=["POST"])
def save_data():
success = True
try:
state = request.json
project.save_state(state)
except Exception:
success = False
return jsonify({"success": success})
# Utility Functions
def create_temp_folder(base_path):
"""
Create a temporary folder with a timestamp-based name.
Creates a new directory within the specified base path using a timestamp-based naming
convention for temporary AnnData file storage. The folder name follows the pattern
'temp_anndata_YYYYMMDD_HHMMSS'.
Args:
base_path (str): The parent directory where the temporary folder will be created
Returns:
str: Absolute path to the created temporary folder
Notes:
- The folder is created with exist_ok=True to handle potential race conditions
- Timestamp format used: YYYYMMDD_HHMMSS (e.g., temp_anndata_20250129_143022)
- This function is typically used in conjunction with cleanup_folder() for
temporary file handling during AnnData uploads
Example:
>>> temp_path = create_temp_folder('/path/to/project')
>>> print(temp_path)
'/path/to/project/temp_anndata_20250129_143022'
"""
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
temp_folder_name = f"temp_anndata_{timestamp}"
temp_folder_path = os.path.join(base_path, temp_folder_name)
os.makedirs(temp_folder_path, exist_ok=True)
return temp_folder_path
def cleanup_folder(folder_path):
"""
Recursively delete a folder and all its contents.
Safely removes all files within the specified folder and then removes the folder
itself. Any errors during the cleanup process are logged but do not raise exceptions
to the caller.
Args:
folder_path (str): Absolute path to the folder that needs to be cleaned up
Notes:
- First removes all files within the folder, then removes the empty folder
- Handles non-existent paths safely
- Logs errors to current_app.logger but does not propagate exceptions
- Only removes files (not subdirectories) within the specified folder
- Typically used to clean up temporary folders created by create_temp_folder()
Example:
>>> cleanup_folder('/path/to/temp/folder')
# Folder and its contents are deleted if they exist
# Any errors are logged but not raised
"""
try:
if os.path.exists(folder_path):
for file in os.listdir(folder_path):
file_path = os.path.join(folder_path, file)
if os.path.isfile(file_path):
os.remove(file_path)
os.rmdir(folder_path)
except Exception as e:
current_app.logger.error(f"Error cleaning up folder {folder_path}: {e}")
@project_bp.route("/add_anndata", access_level='editable', methods=["POST"])
def add_anndata():
"""
Upload and process an AnnData file (.h5ad) for the project.
This endpoint handles the upload ofa single file in AnnData format,
converts it to MDV format, and saves it to the project directory. If a file already exists,
it will be saved to a temporary location that expires after 5 minutes.
HTTP Methods:
POST
Request Parameters:
file (FileStorage): The .h5ad file to be uploaded (multipart/form-data)
Returns:
tuple: A tuple containing:
- JSON response with status and message
- HTTP status code
Success Response (200):
{
'status': 'success',
'message': 'Anndata added successfully'
}
Error Responses:
400:
- No file in request:
{
'status': 'error',
'message': 'No file part in the request'
}
- No file selected:
{
'status': 'error',
'message': 'No file selected'
}
- Invalid file type:
{
'status': 'error',
'message': 'Invalid file type. Only .h5ad files are allowed'
}
409:
- File exists conflict:
{
'status': 'conflict',
'message': 'File already exists',
'temp_folder': '/path/to/temp/folder'
}
Notes:
- The function creates a temporary folder for conflict resolution that is automatically
cleaned up after 5 minutes (300 seconds)
- Only .h5ad file extensions are accepted
- Successful upload will trigger conversion from Scanpy AnnData to MDV format
"""
try:
if 'file' not in request.files:
return jsonify({'status': 'error', 'message': 'No file part in the request'}), 400
file = request.files['file']
if not file or not file.filename:
return jsonify({'status': 'error', 'message': 'No file selected'}), 400
if not file.filename.endswith('.h5ad'):
return jsonify({'status': 'error', 'message': 'Invalid file type. Only .h5ad files are allowed'}), 400
target_path = os.path.join(project.dir, "anndata.h5ad")
if os.path.exists(target_path):
temp_folder = create_temp_folder(project.dir)
temp_path = os.path.join(temp_folder, "anndata.h5ad")
file.save(temp_path)
threading.Timer(300, cleanup_folder, args=[temp_folder]).start()
return jsonify({
'status': 'conflict',
'message': 'File already exists',
'temp_folder': temp_folder
}), 409
# Save file and process
file.save(target_path)
anndata = sc.read(target_path)
convert_scanpy_to_mdv(project.dir, anndata)
return jsonify({'status': 'success', 'message': 'Anndata added successfully'}), 200
except Exception as e:
current_app.logger.error(f"Unexpected error: {str(e)}")
return jsonify({'status': 'error', 'message': str(e)}), 500
@project_bp.route("/combine_anndata", access_level='editable', methods=["PATCH"])
def combine_anndata():
"""
Combine a temporary AnnData file with an existing project file.
This endpoint handles the resolution of file conflicts when uploading AnnData files.
It can either combine the temporary file with the existing project file (adding a label prefix
to distinguish the data) or cancel the operation and clean up the temporary files.
HTTP Methods:
PATCH
Request Parameters:
temp_folder (str): Path to the temporary folder containing the AnnData file
combine (bool): Whether to combine the files (true) or cancel the operation (false)
label (str): Prefix label to distinguish the new data when combining files
Returns:
tuple: A tuple containing:
- JSON response with status and message
- HTTP status code
Success Responses:
200:
- Successful merge:
{
'status': 'success',
'message': 'File merged successfully'
}
- Operation cancelled:
{
'status': 'success',
'message': 'Operation cancelled'
}
Error Responses:
400:
- Missing label:
{
'status': 'error',
'message': 'Label field not found'
}
- Missing temporary file:
{
'status': 'error',
'message': 'Temporary file not found'
}
408:
- Timeout:
{
'status': 'error',
'message': 'Request timed out, please try uploading again'
}
500:
- Label generation error:
{
'status': 'error',
'message': '<specific ValueError message>'
}
- Other errors:
{
'status': 'error',
'message': '<specific error message>'
}
Notes:
- This endpoint is typically called after a conflict is detected in the /add_anndata endpoint
- The temporary folder is cleaned up regardless of whether the operation succeeds or fails
- When combining files, the new data is prefixed with the provided label followed by an underscore
- The combined file maintains the original filename 'anndata.h5ad' in the project directory
"""
try:
temp_folder = request.form.get('temp_folder')
combine = request.form.get('combine') == 'true'
label = request.form.get('label')
if not temp_folder or not os.path.exists(temp_folder):
return jsonify({'status': 'error', 'message': 'Request timed out, please try uploading again'}), 408
if not combine:
cleanup_folder(temp_folder)
return jsonify({'status': 'success', 'message': 'Operation cancelled'}), 200
if not label:
cleanup_folder(temp_folder)
return jsonify({'status': 'error', 'message': 'Label field not found'}), 400
temp_path = os.path.join(temp_folder, "anndata.h5ad")
if not os.path.exists(temp_path):
return jsonify({'status': 'error', 'message': 'Temporary file not found'}), 400
# Process and combine the file
new_anndata = sc.read(temp_path)
convert_scanpy_to_mdv(project.dir, new_anndata, delete_existing=False, label=f"{label}_")
new_anndata.write(os.path.join(project.dir, "anndata.h5ad")) # type: ignore - str output from os.path.join should be reliably PathLike
cleanup_folder(temp_folder)
return jsonify({'status': 'success', 'message': 'File merged successfully'}), 200
except ValueError as ve:
current_app.logger.error(f"Label generation error: {str(ve)}")
return jsonify({'status': 'error', 'message': str(ve)}), 500
except Exception as e:
current_app.logger.error(f"Unexpected error: {str(e)}")
return jsonify({'status': 'error', 'message': str(e)}), 500
@project_bp.route("/add_or_update_image_datasource", access_level='editable', methods=["POST"])
def add_or_update_image_datasource():
try:
# Check if request has a file part
if 'file' not in request.files:
return "No file part in the request", 400
# Get the file from the request
file = request.files['file']
# Get the text fields from the request form
datasource_name = request.form.get('datasourceName') # ""
tiff_metadata = request.form.get('tiffMetadata')
# Validate the presence of required fields
if not file or not tiff_metadata:
return "Missing file or tiffMetadata", 400
# If tiff_metadata is sent as JSON string, deserialize it
try:
tiff_metadata = json.loads(tiff_metadata)
except Exception as e:
return jsonify({"status": "error", "message": f"Invalid JSON format for tiffMetadata: {e}"}), 400
# Call the method to add or update the image datasource
view_name = project.add_or_update_image_datasource(tiff_metadata, datasource_name, file)
# If no exception is raised, the operation was successful. let the client know which view will show the image.
print(f">>> notify client that image datasource updated and file uploaded successfully, view: {view_name}")
return jsonify({"status": "success", "message": "Image datasource updated and file uploaded successfully", "view": view_name}), 200
except Exception as e:
return jsonify({"status": "error", "message": str(e)}), 500
@project_bp.route("/add_datasource", access_level='editable', methods=["POST"])
def add_datasource():
# we shouldn't be passing "backend" in request.form, the logic should only be on server
#if backend:
# response = add_datasource_backend(project)
# return response
if (
"permission" not in project.state
or not project.state["permission"] == "edit"
):
return "Project is read-only", 400
success = True
try:
name = request.form["name"]
print("In server.py add_datasource")
if not name:
return "Request must contain 'name'", 400
# xxx - not how column metadata should be passed, todo fix
# cols = (
# request.form["columns"].split(",")
# if "columns" in request.form
# else None
# )
# I'm not sure we really want to add to default view by default - could mess up existing views in a project with multiple datasources
# but probably ok for now (famous last words)
view = request.form["view"] if "view" in request.form else "default"
# replace = True if "replace" in request.form else False
replace = False
if not replace and name in [ds["name"] for ds in project.datasources]:
return (
f"Datasource '{name}' already exists, and 'replace' was not set in request",
400,
)
if "file" not in request.files:
return "No 'file' provided in request form data", 400
file = request.files["file"]
supplied_only = True if "supplied_only" in request.form else False
if not file or file.mimetype != "text/csv":
return "File must be a CSV", 400
file.seek(0)
# will this work? can we return progress to the client?
df = pd.read_csv(file.stream)
print("In server.py add_datasource- df created")
print("df is ready, calling project.add_datasource")
project.add_datasource(
#project.id,
name,
df,
# cols,
add_to_view=view,
supplied_columns_only=supplied_only,
replace_data=replace
)
print("added df - project.add_datasource completed")
except Exception as e:
# success = False
return str(e), 400
metadata = project.get_datasource_metadata(name)
return jsonify({"success": success, "metadata": metadata})
if open_browser:
webbrowser.open(f"http://localhost:{port}/{route}")
if not multi_project:
if not isinstance(app, Flask):
raise Exception(
"assert: serving single project should have made a Flask app instance by now"
)
# user_reloader=False, allows the server to work within jupyter
app.run(host="0.0.0.0", port=port, debug=True, use_reloader=False)