diff --git a/Dockerfile-webservice b/Dockerfile-webservice index 076ae29..c17620c 100644 --- a/Dockerfile-webservice +++ b/Dockerfile-webservice @@ -1,4 +1,4 @@ -FROM tiangolo/uvicorn-gunicorn-fastapi:python3.7 +FROM tiangolo/uvicorn-gunicorn-fastapi:python3.8 COPY ./requirements.txt /tmp/ diff --git a/requirements.txt b/requirements.txt index 371ac0e..4af963f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,12 +1,8 @@ fastapi -h5py>=3 +h5py numpy pandas passlib Pillow -pydantic -pyscicat==0.2.6 -pytz -suitcase-jsonl -tzlocal -zmq \ No newline at end of file +pyscicat @ git+https://github.com/dylanmcreynolds/pyscicat@small_fixes +pytz \ No newline at end of file diff --git a/splash_ingest/ingestors/ingest_tomo832.py b/splash_ingest/ingestors/ingest_tomo832.py index e4ee7bc..562601c 100644 --- a/splash_ingest/ingestors/ingest_tomo832.py +++ b/splash_ingest/ingestors/ingest_tomo832.py @@ -8,6 +8,7 @@ from pyscicat.client import ScicatClient from pyscicat.model import ( Attachment, + CreateDatasetOrigDatablockDto, Datablock, DataFile, RawDataset, @@ -90,7 +91,7 @@ def upload_raw_dataset( dataset = RawDataset( owner=scicat_metadata.get("/measurement/sample/experiment/pi") or "Unknown", - contactEmail=scicat_metadata.get("/measurement/sample/experimenter/email") + contactEmail=clean_email(scicat_metadata.get("/measurement/sample/experimenter/email")) or "Unknown", creationLocation=scicat_metadata.get("/measurement/instrument/instrument_name") or "Unknown", @@ -110,9 +111,9 @@ def upload_raw_dataset( description=description, keywords=appended_keywords, creationTime=file_mod_time, - **ownable.dict(), + **ownable.model_dump(), ) - dataset_id = scicat_client.upload_raw_dataset(dataset) + dataset_id = scicat_client.upload_new_dataset(dataset) return dataset_id @@ -132,16 +133,14 @@ def create_data_files(file_path: Path) -> List[DataFile]: def upload_data_block( scicat_client: ScicatClient, file_path: Path, dataset_id: str, ownable: Ownable ) -> Datablock: - "Creates a datablock of fits files" + "Creates a datablock of files" datafiles = create_data_files(file_path) - datablock = Datablock( - datasetId=dataset_id, + datablock = CreateDatasetOrigDatablockDto( size=get_file_size(file_path), - dataFileList=datafiles, - **ownable.dict(), + dataFileList=datafiles ) - scicat_client.upload_datablock(datablock) + return scicat_client.upload_dataset_origdatablock(dataset_id, datablock) def upload_attachment( @@ -155,7 +154,7 @@ def upload_attachment( datasetId=dataset_id, thumbnail=encoded_thumnbnail, caption="raw image", - **ownable.dict(), + **ownable.model_dump(), ) scicat_client.upload_attachment(attachment) @@ -165,7 +164,7 @@ def get_file_size(file_path: Path) -> int: def get_file_mod_time(file_path: Path) -> str: - return str(datetime.fromtimestamp(file_path.lstat().st_mtime)) + return datetime.fromtimestamp(file_path.lstat().st_mtime).isoformat() def _extract_fields(file, keys, issues) -> Dict[str, Any]: @@ -217,6 +216,11 @@ def _get_data_sample(file, sample_size=10): return data_sample +def clean_email(email: str): + if email: + return email.replace(" ", "").replace(",", "").replace("'", "") + return None + scicat_metadata_keys = [ "/measurement/instrument/instrument_name", @@ -291,3 +295,18 @@ def _get_data_sample(file, sample_size=10): "/measurement/instrument/monochromator/setup/turret2", "/measurement/instrument/monochromator/setup/turret1", ] + + +if __name__ == "__main__": + ingest( + ScicatClient( + "http://localhost:3000/api/v3", + None, + "ingestor", + "aman" + ), + "admin", + "/Users/dylanmcreynolds/data/beamlines/8.3.2/20230927_165759_ddd.h5", + Path("/Users/dylanmcreynolds/data/beamlines/8.3.2/thumbnails"), + [], + ) \ No newline at end of file diff --git a/splash_ingest/ingestors/scicat_utils.py b/splash_ingest/ingestors/scicat_utils.py index bbdb826..f77a5fe 100644 --- a/splash_ingest/ingestors/scicat_utils.py +++ b/splash_ingest/ingestors/scicat_utils.py @@ -47,7 +47,7 @@ def calculate_access_controls(username, beamline, proposal) -> Dict: def build_search_terms(sample_name): - """exctract search terms from sample name to provide something pleasing to search on""" + """extract search terms from sample name to provide something pleasing to search on""" terms = re.split("[^a-zA-Z0-9]", sample_name) description = [term.lower() for term in terms if len(term) > 0] return " ".join(description) diff --git a/splash_ingest/server/api.py b/splash_ingest/server/api.py index 3504b55..1cdb3e2 100644 --- a/splash_ingest/server/api.py +++ b/splash_ingest/server/api.py @@ -101,7 +101,7 @@ class CreateJobRequest(BaseModel): class CreateJobResponse(BaseModel): message: str = Field(description="return message") - job_id: Optional[str] = Field(description="uid of newly created job, if created") + job_id: Optional[str] = Field(None, description="uid of newly created job, if created") @app.post( diff --git a/splash_ingest/server/model.py b/splash_ingest/server/model.py index 4ac4d23..d2de20e 100644 --- a/splash_ingest/server/model.py +++ b/splash_ingest/server/model.py @@ -30,9 +30,9 @@ class JobStatus(str, Enum): class StatusItem(BaseModel): time: datetime status: JobStatus - log: Optional[str] + log: Optional[str] = None submitter: str - issues: Optional[List[Issue]] + issues: Optional[List[Issue]] = None class Job(BaseModel): @@ -43,9 +43,9 @@ class Job(BaseModel): document_path: str status: JobStatus = None mapping_id: Optional[str] = None - submitter: Optional[str] + submitter: Optional[str] = None status_history: Optional[List[StatusItem]] = [] - ingest_types: Optional[List[IngestType]] + ingest_types: Optional[List[IngestType]] = None class Entity(BaseModel): diff --git a/splash_ingest/tests/test_ingest_832tomo.py b/splash_ingest/tests/test_ingest_832tomo.py new file mode 100644 index 0000000..2680f57 --- /dev/null +++ b/splash_ingest/tests/test_ingest_832tomo.py @@ -0,0 +1,8 @@ +from splash_ingest.ingestors import ingest_tomo832 + + + +def test_clean_email(): + assert ingest_tomo832.clean_email(" 'slartibartfast@magrathea.gov' ") == "slartibartfast@magrathea.gov" + assert ingest_tomo832.clean_email("slartibartfast@magrathea.gov") == "slartibartfast@magrathea.gov" + assert ingest_tomo832.clean_email(None) == None \ No newline at end of file