Dependency | Reason |
---|---|
Dagrun Running | Task instance's dagrun was not in the 'running' state but in the state 'success'. |
Task Instance State | Task is in the 'success' state which is not a valid state for execution. The task must be cleared in order to be run. |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 | def getMongoDB(**context):
token = context.get("ti").xcom_pull(key="token")
response = requests.get(
url=f"{dRoW_api_end_url}/api/module/document-export/airflow/workflow/66ff882a373b4986e8a2f09b?export_type=0",
headers={
"x-access-token": f"Bearer {token}",
}
)
Data = json.loads(response.text)
# Mapping= {
# "Original Doc No.": "Original_Doc_No",
# "NEC Doc Type": "NEC_Doc_Type",
# "NEC Event No.": "NEC_Event_No",
# "Doc Ver.": "Doc_Ver",
# "Doc Date": "Doc_Date",
# "Subject": "Subject",
# "From": "From",
# "To": "To",
# "CE Amount": "CE_PMI_Amount",
# "CE Increase / Decrease": "CE_Increase_Decrease",
# "Quotation Status": "Quotation_Status",
# "NEC Clause": "NEC_Clause",
# "Receive Date": "Receive_Date"
# }
host = 'drowdatewarehouse.crlwwhgepgi7.ap-east-1.rds.amazonaws.com'
# User name of the database server
dbUserName = 'dRowAdmin'
# Password for the database user
dbUserPassword = 'drowsuper'
# Name of the database
database = 'drowDateWareHouse'
# Character set
charSet = "utf8mb4"
port = "5432"
#create_engine('mysql+mysqldb://root:password@localhost:3306/mydbname', echo = False)
conn_string = ('postgres://' +
dbUserName + ':' +
dbUserPassword +
'@' + host + ':' + port +
'/' + database)
db = create_engine(conn_string)
conn = db.connect()
df = pd.DataFrame()
with conn as conn:
for x in Data:
try:
if len(x['data'].keys()) == 0:
continue
df_nested_list = json_normalize(x['data'])
df = df.append(df_nested_list)
except Exception as e:
print(f"Error processing data: {e}") # Add logging
continue
df.columns = df.columns.str.replace(' ', '_').str.replace('.', '').str.replace('(', '_').str.replace(')', '').str.replace('%', 'percent').str.replace('/', '_')
df.drop(['Payment_Summary', "Contractor's_Application", "PM's_Assessment"], axis=1, inplace=True)
df.to_sql('cv202211_payment_excel', con=conn, if_exists='replace', index= False)
|
Attribute | Value |
---|---|
dag_id | cv202211_payment_excel |
duration | 16.358355 |
end_date | 2025-04-26 08:02:03.394991+00:00 |
execution_date | 2025-04-26T04:00:00+00:00 |
executor_config | {} |
generate_command | <function TaskInstance.generate_command at 0x7f152f9bf320> |
hostname | 63fbafbc3109 |
is_premature | False |
job_id | 142662 |
key | ('cv202211_payment_excel', 'getDataAndSendToPSQL', <Pendulum [2025-04-26T04:00:00+00:00]>, 2) |
log | <Logger airflow.task (INFO)> |
log_filepath | /usr/local/airflow/logs/cv202211_payment_excel/getDataAndSendToPSQL/2025-04-26T04:00:00+00:00.log |
log_url | http://localhost:8080/admin/airflow/log?execution_date=2025-04-26T04%3A00%3A00%2B00%3A00&task_id=getDataAndSendToPSQL&dag_id=cv202211_payment_excel |
logger | <Logger airflow.task (INFO)> |
mark_success_url | http://localhost:8080/success?task_id=getDataAndSendToPSQL&dag_id=cv202211_payment_excel&execution_date=2025-04-26T04%3A00%3A00%2B00%3A00&upstream=false&downstream=false |
max_tries | 1 |
metadata | MetaData(bind=None) |
next_try_number | 2 |
operator | PythonOperator |
pid | 2843058 |
pool | default_pool |
prev_attempted_tries | 1 |
previous_execution_date_success | 2025-04-26 00:00:00+00:00 |
previous_start_date_success | 2025-04-26 04:01:14.961773+00:00 |
previous_ti | <TaskInstance: cv202211_payment_excel.getDataAndSendToPSQL 2025-04-26 00:00:00+00:00 [success]> |
previous_ti_success | <TaskInstance: cv202211_payment_excel.getDataAndSendToPSQL 2025-04-26 00:00:00+00:00 [success]> |
priority_weight | 1 |
queue | default |
queued_dttm | 2025-04-26 08:01:42.472149+00:00 |
raw | False |
run_as_user | None |
start_date | 2025-04-26 08:01:47.036636+00:00 |
state | success |
task | <Task(PythonOperator): getDataAndSendToPSQL> |
task_id | getDataAndSendToPSQL |
test_mode | False |
try_number | 2 |
unixname | airflow |
Attribute | Value |
---|---|
dag | <DAG: cv202211_payment_excel> |
dag_id | cv202211_payment_excel |
depends_on_past | False |
deps | {<TIDep(Not In Retry Period)>, <TIDep(Trigger Rule)>, <TIDep(Previous Dagrun State)>} |
do_xcom_push | True |
downstream_list | [] |
downstream_task_ids | set() |
None | |
email_on_failure | True |
email_on_retry | True |
end_date | None |
execution_timeout | None |
executor_config | {} |
extra_links | [] |
global_operator_extra_link_dict | {} |
inlets | [] |
lineage_data | None |
log | <Logger airflow.task.operators (INFO)> |
logger | <Logger airflow.task.operators (INFO)> |
max_retry_delay | None |
on_failure_callback | None |
on_retry_callback | None |
on_success_callback | None |
op_args | [] |
op_kwargs | {'name': 'Dylan'} |
operator_extra_link_dict | {} |
operator_extra_links | () |
outlets | [] |
owner | airflow |
params | {} |
pool | default_pool |
priority_weight | 1 |
priority_weight_total | 1 |
provide_context | True |
queue | default |
resources | None |
retries | 1 |
retry_delay | 0:05:00 |
retry_exponential_backoff | False |
run_as_user | None |
schedule_interval | 0 0,4,8,11,16 * * * |
shallow_copy_attrs | ('python_callable', 'op_kwargs') |
sla | None |
start_date | 2022-10-24T00:00:00+00:00 |
subdag | None |
task_concurrency | None |
task_id | getDataAndSendToPSQL |
task_type | PythonOperator |
template_ext | [] |
template_fields | ('templates_dict', 'op_args', 'op_kwargs') |
templates_dict | None |
trigger_rule | all_success |
ui_color | #ffefeb |
ui_fgcolor | #000 |
upstream_list | [<Task(PythonOperator): getDrowToken>] |
upstream_task_ids | {'getDrowToken'} |
wait_for_downstream | False |
weight_rule | downstream |