From 619ac7c8cc928d35c7f601082921875f952524f1 Mon Sep 17 00:00:00 2001 From: Dylan McReynolds Date: Sun, 1 Oct 2023 17:35:50 -0700 Subject: [PATCH 1/3] update pydantic, pyscicat --- requirements.txt | 21 +++++------ splash_ingest/ingestors/ingest_tomo832.py | 41 ++++++++++++++++------ splash_ingest/ingestors/scicat_utils.py | 2 +- splash_ingest/server/api.py | 2 +- splash_ingest/server/model.py | 8 ++--- splash_ingest/tests/test_ingest_832tomo.py | 8 +++++ 6 files changed, 53 insertions(+), 29 deletions(-) create mode 100644 splash_ingest/tests/test_ingest_832tomo.py diff --git a/requirements.txt b/requirements.txt index 371ac0e..db24c33 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,12 +1,9 @@ -fastapi -h5py>=3 -numpy -pandas -passlib -Pillow -pydantic -pyscicat==0.2.6 -pytz -suitcase-jsonl -tzlocal -zmq \ No newline at end of file +fastapi==0.103.2 +h5py==3.9.0 +numpy==1.26.0 +pandas==2.1.1 +passlib==1.7.4 +Pillow==10.0.1 +pydantic==2.4.2 +pyscicat @ git+https://github.com/dylanmcreynolds/pyscicat/tree/small_fixes@new_scicat +pytz==2023.3.post1 \ No newline at end of file diff --git a/splash_ingest/ingestors/ingest_tomo832.py b/splash_ingest/ingestors/ingest_tomo832.py index e4ee7bc..562601c 100644 --- a/splash_ingest/ingestors/ingest_tomo832.py +++ b/splash_ingest/ingestors/ingest_tomo832.py @@ -8,6 +8,7 @@ from pyscicat.client import ScicatClient from pyscicat.model import ( Attachment, + CreateDatasetOrigDatablockDto, Datablock, DataFile, RawDataset, @@ -90,7 +91,7 @@ def upload_raw_dataset( dataset = RawDataset( owner=scicat_metadata.get("/measurement/sample/experiment/pi") or "Unknown", - contactEmail=scicat_metadata.get("/measurement/sample/experimenter/email") + contactEmail=clean_email(scicat_metadata.get("/measurement/sample/experimenter/email")) or "Unknown", creationLocation=scicat_metadata.get("/measurement/instrument/instrument_name") or "Unknown", @@ -110,9 +111,9 @@ def upload_raw_dataset( description=description, keywords=appended_keywords, creationTime=file_mod_time, - **ownable.dict(), + **ownable.model_dump(), ) - dataset_id = scicat_client.upload_raw_dataset(dataset) + dataset_id = scicat_client.upload_new_dataset(dataset) return dataset_id @@ -132,16 +133,14 @@ def create_data_files(file_path: Path) -> List[DataFile]: def upload_data_block( scicat_client: ScicatClient, file_path: Path, dataset_id: str, ownable: Ownable ) -> Datablock: - "Creates a datablock of fits files" + "Creates a datablock of files" datafiles = create_data_files(file_path) - datablock = Datablock( - datasetId=dataset_id, + datablock = CreateDatasetOrigDatablockDto( size=get_file_size(file_path), - dataFileList=datafiles, - **ownable.dict(), + dataFileList=datafiles ) - scicat_client.upload_datablock(datablock) + return scicat_client.upload_dataset_origdatablock(dataset_id, datablock) def upload_attachment( @@ -155,7 +154,7 @@ def upload_attachment( datasetId=dataset_id, thumbnail=encoded_thumnbnail, caption="raw image", - **ownable.dict(), + **ownable.model_dump(), ) scicat_client.upload_attachment(attachment) @@ -165,7 +164,7 @@ def get_file_size(file_path: Path) -> int: def get_file_mod_time(file_path: Path) -> str: - return str(datetime.fromtimestamp(file_path.lstat().st_mtime)) + return datetime.fromtimestamp(file_path.lstat().st_mtime).isoformat() def _extract_fields(file, keys, issues) -> Dict[str, Any]: @@ -217,6 +216,11 @@ def _get_data_sample(file, sample_size=10): return data_sample +def clean_email(email: str): + if email: + return email.replace(" ", "").replace(",", "").replace("'", "") + return None + scicat_metadata_keys = [ "/measurement/instrument/instrument_name", @@ -291,3 +295,18 @@ def _get_data_sample(file, sample_size=10): "/measurement/instrument/monochromator/setup/turret2", "/measurement/instrument/monochromator/setup/turret1", ] + + +if __name__ == "__main__": + ingest( + ScicatClient( + "http://localhost:3000/api/v3", + None, + "ingestor", + "aman" + ), + "admin", + "/Users/dylanmcreynolds/data/beamlines/8.3.2/20230927_165759_ddd.h5", + Path("/Users/dylanmcreynolds/data/beamlines/8.3.2/thumbnails"), + [], + ) \ No newline at end of file diff --git a/splash_ingest/ingestors/scicat_utils.py b/splash_ingest/ingestors/scicat_utils.py index bbdb826..f77a5fe 100644 --- a/splash_ingest/ingestors/scicat_utils.py +++ b/splash_ingest/ingestors/scicat_utils.py @@ -47,7 +47,7 @@ def calculate_access_controls(username, beamline, proposal) -> Dict: def build_search_terms(sample_name): - """exctract search terms from sample name to provide something pleasing to search on""" + """extract search terms from sample name to provide something pleasing to search on""" terms = re.split("[^a-zA-Z0-9]", sample_name) description = [term.lower() for term in terms if len(term) > 0] return " ".join(description) diff --git a/splash_ingest/server/api.py b/splash_ingest/server/api.py index 3504b55..1cdb3e2 100644 --- a/splash_ingest/server/api.py +++ b/splash_ingest/server/api.py @@ -101,7 +101,7 @@ class CreateJobRequest(BaseModel): class CreateJobResponse(BaseModel): message: str = Field(description="return message") - job_id: Optional[str] = Field(description="uid of newly created job, if created") + job_id: Optional[str] = Field(None, description="uid of newly created job, if created") @app.post( diff --git a/splash_ingest/server/model.py b/splash_ingest/server/model.py index 4ac4d23..d2de20e 100644 --- a/splash_ingest/server/model.py +++ b/splash_ingest/server/model.py @@ -30,9 +30,9 @@ class JobStatus(str, Enum): class StatusItem(BaseModel): time: datetime status: JobStatus - log: Optional[str] + log: Optional[str] = None submitter: str - issues: Optional[List[Issue]] + issues: Optional[List[Issue]] = None class Job(BaseModel): @@ -43,9 +43,9 @@ class Job(BaseModel): document_path: str status: JobStatus = None mapping_id: Optional[str] = None - submitter: Optional[str] + submitter: Optional[str] = None status_history: Optional[List[StatusItem]] = [] - ingest_types: Optional[List[IngestType]] + ingest_types: Optional[List[IngestType]] = None class Entity(BaseModel): diff --git a/splash_ingest/tests/test_ingest_832tomo.py b/splash_ingest/tests/test_ingest_832tomo.py new file mode 100644 index 0000000..2680f57 --- /dev/null +++ b/splash_ingest/tests/test_ingest_832tomo.py @@ -0,0 +1,8 @@ +from splash_ingest.ingestors import ingest_tomo832 + + + +def test_clean_email(): + assert ingest_tomo832.clean_email(" 'slartibartfast@magrathea.gov' ") == "slartibartfast@magrathea.gov" + assert ingest_tomo832.clean_email("slartibartfast@magrathea.gov") == "slartibartfast@magrathea.gov" + assert ingest_tomo832.clean_email(None) == None \ No newline at end of file From cfbe2551be7479f476c701f3f2c249bc06a54dc8 Mon Sep 17 00:00:00 2001 From: Dylan McReynolds Date: Sun, 1 Oct 2023 17:43:41 -0700 Subject: [PATCH 2/3] fix github pin of pyscicat --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index db24c33..3748557 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,5 +5,5 @@ pandas==2.1.1 passlib==1.7.4 Pillow==10.0.1 pydantic==2.4.2 -pyscicat @ git+https://github.com/dylanmcreynolds/pyscicat/tree/small_fixes@new_scicat +pyscicat @ git+https://github.com/dylanmcreynolds/pyscicat@small_fixes pytz==2023.3.post1 \ No newline at end of file From 5656cfd8e530aafd62e42904d5b669806f144d9b Mon Sep 17 00:00:00 2001 From: Dylan McReynolds Date: Sun, 1 Oct 2023 17:58:30 -0700 Subject: [PATCH 3/3] fix requirements --- Dockerfile-webservice | 2 +- requirements.txt | 15 +++++++-------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/Dockerfile-webservice b/Dockerfile-webservice index 076ae29..c17620c 100644 --- a/Dockerfile-webservice +++ b/Dockerfile-webservice @@ -1,4 +1,4 @@ -FROM tiangolo/uvicorn-gunicorn-fastapi:python3.7 +FROM tiangolo/uvicorn-gunicorn-fastapi:python3.8 COPY ./requirements.txt /tmp/ diff --git a/requirements.txt b/requirements.txt index 3748557..4af963f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,8 @@ -fastapi==0.103.2 -h5py==3.9.0 -numpy==1.26.0 -pandas==2.1.1 -passlib==1.7.4 -Pillow==10.0.1 -pydantic==2.4.2 +fastapi +h5py +numpy +pandas +passlib +Pillow pyscicat @ git+https://github.com/dylanmcreynolds/pyscicat@small_fixes -pytz==2023.3.post1 \ No newline at end of file +pytz \ No newline at end of file