Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Dockerfile-webservice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM tiangolo/uvicorn-gunicorn-fastapi:python3.7
FROM tiangolo/uvicorn-gunicorn-fastapi:python3.8


COPY ./requirements.txt /tmp/
Expand Down
10 changes: 3 additions & 7 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
fastapi
h5py>=3
h5py
numpy
pandas
passlib
Pillow
pydantic
pyscicat==0.2.6
pytz
suitcase-jsonl
tzlocal
zmq
pyscicat @ git+https://github.com/dylanmcreynolds/pyscicat@small_fixes
pytz
41 changes: 30 additions & 11 deletions splash_ingest/ingestors/ingest_tomo832.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from pyscicat.client import ScicatClient
from pyscicat.model import (
Attachment,
CreateDatasetOrigDatablockDto,
Datablock,
DataFile,
RawDataset,
Expand Down Expand Up @@ -90,7 +91,7 @@ def upload_raw_dataset(

dataset = RawDataset(
owner=scicat_metadata.get("/measurement/sample/experiment/pi") or "Unknown",
contactEmail=scicat_metadata.get("/measurement/sample/experimenter/email")
contactEmail=clean_email(scicat_metadata.get("/measurement/sample/experimenter/email"))
or "Unknown",
creationLocation=scicat_metadata.get("/measurement/instrument/instrument_name")
or "Unknown",
Expand All @@ -110,9 +111,9 @@ def upload_raw_dataset(
description=description,
keywords=appended_keywords,
creationTime=file_mod_time,
**ownable.dict(),
**ownable.model_dump(),
)
dataset_id = scicat_client.upload_raw_dataset(dataset)
dataset_id = scicat_client.upload_new_dataset(dataset)
return dataset_id


Expand All @@ -132,16 +133,14 @@ def create_data_files(file_path: Path) -> List[DataFile]:
def upload_data_block(
scicat_client: ScicatClient, file_path: Path, dataset_id: str, ownable: Ownable
) -> Datablock:
"Creates a datablock of fits files"
"Creates a datablock of files"
datafiles = create_data_files(file_path)

datablock = Datablock(
datasetId=dataset_id,
datablock = CreateDatasetOrigDatablockDto(
size=get_file_size(file_path),
dataFileList=datafiles,
**ownable.dict(),
dataFileList=datafiles
)
scicat_client.upload_datablock(datablock)
return scicat_client.upload_dataset_origdatablock(dataset_id, datablock)


def upload_attachment(
Expand All @@ -155,7 +154,7 @@ def upload_attachment(
datasetId=dataset_id,
thumbnail=encoded_thumnbnail,
caption="raw image",
**ownable.dict(),
**ownable.model_dump(),
)
scicat_client.upload_attachment(attachment)

Expand All @@ -165,7 +164,7 @@ def get_file_size(file_path: Path) -> int:


def get_file_mod_time(file_path: Path) -> str:
return str(datetime.fromtimestamp(file_path.lstat().st_mtime))
return datetime.fromtimestamp(file_path.lstat().st_mtime).isoformat()


def _extract_fields(file, keys, issues) -> Dict[str, Any]:
Expand Down Expand Up @@ -217,6 +216,11 @@ def _get_data_sample(file, sample_size=10):

return data_sample

def clean_email(email: str):
if email:
return email.replace(" ", "").replace(",", "").replace("'", "")
return None


scicat_metadata_keys = [
"/measurement/instrument/instrument_name",
Expand Down Expand Up @@ -291,3 +295,18 @@ def _get_data_sample(file, sample_size=10):
"/measurement/instrument/monochromator/setup/turret2",
"/measurement/instrument/monochromator/setup/turret1",
]


if __name__ == "__main__":
ingest(
ScicatClient(
"http://localhost:3000/api/v3",
None,
"ingestor",
"aman"
),
"admin",
"/Users/dylanmcreynolds/data/beamlines/8.3.2/20230927_165759_ddd.h5",
Path("/Users/dylanmcreynolds/data/beamlines/8.3.2/thumbnails"),
[],
)
2 changes: 1 addition & 1 deletion splash_ingest/ingestors/scicat_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def calculate_access_controls(username, beamline, proposal) -> Dict:


def build_search_terms(sample_name):
"""exctract search terms from sample name to provide something pleasing to search on"""
"""extract search terms from sample name to provide something pleasing to search on"""
terms = re.split("[^a-zA-Z0-9]", sample_name)
description = [term.lower() for term in terms if len(term) > 0]
return " ".join(description)
Expand Down
2 changes: 1 addition & 1 deletion splash_ingest/server/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ class CreateJobRequest(BaseModel):

class CreateJobResponse(BaseModel):
message: str = Field(description="return message")
job_id: Optional[str] = Field(description="uid of newly created job, if created")
job_id: Optional[str] = Field(None, description="uid of newly created job, if created")


@app.post(
Expand Down
8 changes: 4 additions & 4 deletions splash_ingest/server/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ class JobStatus(str, Enum):
class StatusItem(BaseModel):
time: datetime
status: JobStatus
log: Optional[str]
log: Optional[str] = None
submitter: str
issues: Optional[List[Issue]]
issues: Optional[List[Issue]] = None


class Job(BaseModel):
Expand All @@ -43,9 +43,9 @@ class Job(BaseModel):
document_path: str
status: JobStatus = None
mapping_id: Optional[str] = None
submitter: Optional[str]
submitter: Optional[str] = None
status_history: Optional[List[StatusItem]] = []
ingest_types: Optional[List[IngestType]]
ingest_types: Optional[List[IngestType]] = None


class Entity(BaseModel):
Expand Down
8 changes: 8 additions & 0 deletions splash_ingest/tests/test_ingest_832tomo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from splash_ingest.ingestors import ingest_tomo832



def test_clean_email():
assert ingest_tomo832.clean_email(" 'slartibartfast@magrathea.gov' ") == "slartibartfast@magrathea.gov"
assert ingest_tomo832.clean_email("slartibartfast@magrathea.gov") == "slartibartfast@magrathea.gov"
assert ingest_tomo832.clean_email(None) == None