Commit 47734581 authored by Sven Lautenbach's avatar Sven Lautenbach
Browse files

updated data. fixed problem with date in CTRI registered studies and extend...

updated data. fixed problem with date in CTRI registered studies and extend list of studies that needed update in date by 3 studies
parent 4ab8b81c
......@@ -158,6 +158,14 @@ def changeDateFormat(aStr):
return (aStr)
def changeDateFormatCTRI(aStr):
# change from day-month-year
# to year-month-day
if (aStr.find("-")==2):
theParts = aStr.split("-")
return(theParts[2] + "-" + theParts[1] + "-"+ theParts[0])
return (aStr)
def run():
"""Run entire workflow."""
......@@ -170,19 +178,26 @@ def run():
# TODO: this is currently broken! You need to provide as csv file manually.
new_data_df = get_trials_df_from_url()
# remove wrong case NCT04256395
studies_to_delete = ["NCT04256395", "NCT04226157", "ISRCTN51287266", "NCT04331860"]
studies_to_delete = ["NCT04256395", "NCT04226157", "ISRCTN51287266", "NCT03042143", "NCT03891420", "EUCTR2019-002688-89-ES"]
for study in studies_to_delete:
new_data_df = new_data_df.drop(new_data_df[new_data_df['TrialID'] == study].index, axis=0)
# for some studies that have been running for a while and have now been updated to incorporate COVID-19
# as well the suggestions by Markus Reis and Konstantin is to use the date in "Last Update Post" instead
studies_updated = ["EUCTR2015-002340-14-NL", "NCT03331445", "NCT04061382", "NCT03680274", "NCT03808922"]
studies_updated = ["EUCTR2015-002340-14-NL", "NCT03331445", "NCT04061382", "NCT03680274", "NCT03808922", "NCT03042143"]
needsUpdate = new_data_df['TrialID'].isin(studies_updated)
new_data_df["Date registration"] = np.where(needsUpdate,
new_data_df["Last Refreshed on"],
new_data_df["Date registration"])
isSlashDateFormat = new_data_df["Date registration"].str.find("/") == 2
# change date for specific cases
# it seems that CTRI is reporting as day-month-year
isCTRI = new_data_df["TrialID"].str.find("CTRI") == 0
new_data_df["Date registration"] = np.where(
new_data_df["Date registration"].apply(changeDateFormatCTRI),
new_data_df["Date registration"]
new_data_df["Date registration"] = new_data_df["Date registration"].apply(changeDateFormat)
new_data_df["Date registration"] = pd.to_datetime(new_data_df["Date registration"])
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment