1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121 | def getMongoDB(**context):
token = context.get("ti").xcom_pull(key="token")
response = requests.get(
url=f"{dRoW_api_end_url}/api/module/document-export/airflow/workflow/64ba0dc1ef64f30c95e70223?export_type=0",
headers={
"x-access-token": f"Bearer {token}",
"ICWPxAccessKey": "nd@201907ICWP_[1AG:4UdI){n=b~"
})
RISC_Data = json.loads(response.text)
Mapping= {
"Date of Inspection:" : "a3_date_time",
"Follow up Summary": "c_summary_of_follow_up_actions",
}
saftey_cats=[
"1. 進出途徑 Access and Egress:",
"2. 一般事項 General",
"3. 高空作業 Working at Heigh:",
"4. 起重機械及起重裝置Lifting Appliances & Lifting Gear:",
"5. 電力 Electricity:",
"6. 泥土工程 Earthwork:",
"7. 機器 Machinery:",
"8. 防火措施 Fire Preventions:",
"9. 健康 Health:",
"10. 個人防護設備 Personal Protective Equipment:",
"11. 密閉空間 Confined Space:",
"12. 化學物品:",
"13. 福利設施:",
]
host = 'drowdatewarehouse.crlwwhgepgi7.ap-east-1.rds.amazonaws.com'
# User name of the database server
dbUserName = 'dRowAdmin'
# Password for the database user
dbUserPassword = 'drowsuper'
# Name of the database
database = 'drowDateWareHouse'
# Character set
charSet = "utf8mb4"
port = "5432"
conn_string = ('postgres://' +
dbUserName + ':' +
dbUserPassword +
'@' + host + ':' + port +
'/' + database)
db = create_engine(conn_string)
conn = db.connect()
df = pd.DataFrame()
with conn as conn:
for x in RISC_Data:
df_nested_list = json_normalize(x['data'])
df2 = df_nested_list.reindex(columns=Mapping.keys())
if len(x['ApproveLogSummary']) > 0:
request_data = [data for data in x['ApproveLogSummary'] if data.get('statusName')=="B : Checked by RSS"]
if len(request_data) > 0 and 'from' in request_data[-1]:
df2['sup_rep_signed_date'] = request_data[len(request_data)-1]['from']
else:
df2['sup_rep_signed_date'] = None
if len(request_data) > 0 and 'to' in request_data[-1]:
df2['contractor_rep_signed_date'] = request_data[len(request_data)-1]['to']
else:
df2['contractor_rep_signed_date'] = None
else:
df2['sup_rep_signed_date'] = None
df2['contractor_rep_signed_date'] = None
if (len(x['data']['Follow up Summary']) > 0):
total_late_retification = 0
for summaryData in x['data']['Follow up Summary']:
if ("Agreed Due Date for Completion" in summaryData and "Agreed Due Date for Completion" in summaryData and not (summaryData["Agreed Due Date for Completion"]!='') and (not (summaryData["Date Completed"]!='')) and (summaryData["Agreed Due Date for Completion"].astype('datetime64[ns]') < summaryData["Date Completed"].astype('datetime64[ns]')).bool()):
total_late_retification += 1
df2['total_late_retification'] = total_late_retification
else:
total_late_retification = 0
if (not df2['contractor_rep_signed_date'].isnull().bool() and not df2['Date of Inspection:'].isnull().bool()):
df2['days_complete'] = (((df2['contractor_rep_signed_date'].astype('datetime64[ns]') -
df2['Date of Inspection:'].astype('datetime64[ns]'))/ np.timedelta64(1, 'h'))/24).round(2)
if df2['days_complete'].isnull().bool() or df2['days_complete'].lt(0).bool():
df2['days_complete'] = 0
else:
df2['days_complete'] = None
df4=pd.DataFrame()
for saftey_cat in saftey_cats:
df3=df2.copy()
complete = 0
incomplete = 0
if not df2['sup_rep_signed_date'].isnull().bool():
if (len(x['data'][saftey_cat]) > 0):
for record in x['data'][saftey_cat]:
if record[saftey_cat.split(" ")[0] +' Result'] != '':
complete += 1
else:
if (len(x['data'][saftey_cat]) > 0):
for record in x['data'][saftey_cat]:
if record[saftey_cat.split(" ")[0] +' Result'] != '':
incomplete += 1
df3['saftey_cat'] = saftey_cat
df3['saftey_cat' + '_' + 'complete'] = complete
df3['saftey_cat' + '_' + 'incomplete'] = incomplete
df4 = df4.append(df3)
df2=df2.append(df4)
df = df.append(df2)
df.rename(columns=Mapping, inplace=True)
df['sup_rep_signed_date']=df['sup_rep_signed_date'].apply(pd.to_datetime)
df['contractor_rep_signed_date']=df['contractor_rep_signed_date'].apply(pd.to_datetime)
df['a3_date_time']=df['a3_date_time'].apply(pd.to_datetime)
# Remove all rows with column 'safety_cat' is null
df = df[df['saftey_cat'].notnull()]
df.columns = df.columns.str.replace(' ', '_').str.replace('.', '').str.replace('(', '_').str.replace(')', '').str.replace('%', 'percent').str.replace('/', '_')
df.drop(['c_summary_of_follow_up_actions'], axis=1, inplace=True)
df.to_sql('safety_walk_cv202302', con=conn, if_exists='replace', index= False)
|