Hi Jodogne, after some work and testing i managed to come up with 2 scripts, one to fix the files duplicated in a server and the other to filter out the incoming cstore instances. so far the one to fix scanned 2350 instances in about 30 seconds on a local orthanc install. and i need to fix a server with about 13 million instances and i believe that around half of them are duplicates. is there a better way to do this? i dont want to consume all the orthanc resources deleting the images nor do i want to be waiting until next year to delete them all. is this possible with peering? Btw the server has been performing really good. we are using a postgres database in nvme, and the storage in 6x 6tb drives in raid z2, we get around 300-600mb a sec. it could be better but cant seem to find where is the overhead.
here are the scripts
Duplicated_Instances_fix-py
import datetime
import time
import requests
import json
from requests.auth import HTTPBasicAuth
baseUrl = "http://127.0.0.1:8042"
auth = HTTPBasicAuth('orthanc', 'orthanc')
r_studies = requests.get(baseUrl + "/studies", auth=auth)
scanned_instances_count = 0
scanned_series_count = 0
scanned_studies_count = 0
deleted_instances_count = 0
start_time = datetime.datetime.now()
print(datetime.datetime.now())
for study in r_studies.json():
study_start_time = datetime.datetime.now()
print("removing duplicated instances for study: %s" % study)
r_study = requests.get(baseUrl + "/studies/" + study, auth=('orthanc', 'orthanc'))
for series in r_study.json()["Series"]:
deleted_instances = []
series_start_time = datetime.datetime.now()
r_series = requests.get(baseUrl + "/series/" + series, auth=auth)
print("removing duplicated instances for series: ", series)
print(datetime.datetime.now())
for instance in r_series.json()["Instances"]:
if instance in deleted_instances:
print("instance already deleted skipping")
break
time.sleep(0.001)
r_instance = requests.get(baseUrl + "/instances/" + instance + "/simplified-tags", auth=auth)
data = {
"Level": "Instance",
"Query": {
"SeriesInstanceUID": r_instance.json()["SeriesInstanceUID"],
"InstanceNumber": r_instance.json()["InstanceNumber"]
}
}
r_find = requests.post(baseUrl + "/tools/find", json=data, auth=auth)
find = json.loads(json.dumps(r_find.json()))
while len(find) > 1:
print("duplicates in series found")
r_delete = requests.delete(baseUrl + "/instances/" + find[-1], auth=auth)
print("deleted duplicate: ", find[-1])
deleted_instances.append(find[-1])
deleted_instances_count += 1
del find[-1]
scanned_instances_count += 1
scanned_series_count += 1
series_end_time = datetime.datetime.now()
series_delta_time = series_end_time - series_start_time
print("Time to scan and delete duplicates on this series: ", series_delta_time)
scanned_studies_count += 1
study_end_time = datetime.datetime.now()
study_delta = study_end_time - study_start_time
print("Time to scan and delete duplicates on this study:", study_delta)
end_time = datetime.datetime.now()
total_time = end_time - start_time
print("Deleted a total of ", deleted_instances_count, "Instances")
print("Scanned a total of ", scanned_studies_count, " studies, ", scanned_series_count, " series, ", scanned_instances_count, " instances")
print("in ", total_time, "S")
Incoming_duplicate_check.py
import json
import orthanc
def FilterIncomingCStoreInstance(receivedDicom):
instance_json = json.loads(receivedDicom.GetInstanceSimplifiedJson())
data = {
"Level": "Instance",
"Query": {
"SeriesInstanceUID": instance_json["SeriesInstanceUID"],
"InstanceNumber": instance_json["InstanceNumber"]
}
}
query = json.loads(orthanc.RestApiPost('/tools/find', json.dumps(data)))
if query.__len__():
orthanc.RestApiDelete('/instances/%s' % query[0])
print("delete already stored Instance: %s" % query[0])
return 0
orthanc.RegisterIncomingCStoreInstanceFilter(FilterIncomingCStoreInstance)