From f94e14fc89dfe7491430ffafc17cae7b44a34659 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Ciesi=C3=B3=C5=82ka?= Date: Sat, 30 Dec 2023 21:28:07 +0100 Subject: [PATCH] Added Pig scripts for extracting data from 'system' table --- .../generate_columns_single_dataset.pig | 12 ++++++++++++ .../generate_columns_whole_mocca.pig | 11 +++++++++++ .../system_table/join_single_dataset.pig | 18 ++++++++++++++++++ pig_scripts/system_table/join_whole_mocca.pig | 18 ++++++++++++++++++ 4 files changed, 59 insertions(+) create mode 100644 pig_scripts/system_table/generate_columns_single_dataset.pig create mode 100644 pig_scripts/system_table/generate_columns_whole_mocca.pig create mode 100644 pig_scripts/system_table/join_single_dataset.pig create mode 100644 pig_scripts/system_table/join_whole_mocca.pig diff --git a/pig_scripts/system_table/generate_columns_single_dataset.pig b/pig_scripts/system_table/generate_columns_single_dataset.pig new file mode 100644 index 0000000..1a58724 --- /dev/null +++ b/pig_scripts/system_table/generate_columns_single_dataset.pig @@ -0,0 +1,12 @@ +-- change the dataset id if needed +sys = load 'datasets="mocca 01f4f7bfadd9ee5ab2568d73ee7b9dda" tables="system"' using BeansTable(); +sys = foreach sys generate DSID(tbid) as dsid, + DSPARAMSTRING(DSID(tbid), 'moccaid') as moccaid, + tphys, + smt, + r1, + rcob, + rchut2, + rhob, + vc; +store sys into 'name="sys_01f4f7bfadd9ee5ab2568d73ee7b9dda"' using BeansTable(); diff --git a/pig_scripts/system_table/generate_columns_whole_mocca.pig b/pig_scripts/system_table/generate_columns_whole_mocca.pig new file mode 100644 index 0000000..0ea700a --- /dev/null +++ b/pig_scripts/system_table/generate_columns_whole_mocca.pig @@ -0,0 +1,11 @@ +sys = load 'datasets="mocca " tables="system"' using BeansTable(); +sys = foreach sys generate DSID(tbid) as dsid, + DSPARAMSTRING(DSID(tbid), 'moccaid') as moccaid, + tphys, + smt, + r1, + rcob, + rchut2, + rhob, + vc; +store sys into 'name="sys_all_datasets_data"' using BeansTable(); diff --git a/pig_scripts/system_table/join_single_dataset.pig b/pig_scripts/system_table/join_single_dataset.pig new file mode 100644 index 0000000..053bd23 --- /dev/null +++ b/pig_scripts/system_table/join_single_dataset.pig @@ -0,0 +1,18 @@ +sys = load 'datasets="Testing Notebook" tables="sys_01f4f7bfadd9ee5ab2568d73ee7b9dda"' using BeansTable(); +collapse = load 'datasets="core collapse and blue stragglers" tables="collapseMyr_vs_bssdynBssEvol"' using BeansTable(); +j = join sys by moccaid, collapse by dsid; +j = foreach j generate sys::tbid as tbid, + sys::dsid as dsid, + sys::moccaid as moccaid, + sys::tphys as tphys, + sys::smt as smt, + sys::r1 as r1, + sys::rcob as rcob, + sys::rchut2 as rchut2, + sys::rhob as rhob, + sys::vc as vc, + collapse::collapseMyr as collapseMyr, + collapse::confidence as confidence, + (sys::tphys < collapse::collapseMyr ? 0 : 1) as collapsed:int + ; +store j into 'name="sys_01f4f7bfadd9ee5ab2568d73ee7b9dda_with_collapse"' using BeansTable(); diff --git a/pig_scripts/system_table/join_whole_mocca.pig b/pig_scripts/system_table/join_whole_mocca.pig new file mode 100644 index 0000000..310938a --- /dev/null +++ b/pig_scripts/system_table/join_whole_mocca.pig @@ -0,0 +1,18 @@ +sys = load 'datasets="Testing Notebook" tables="sys_all_datasets_data"' using BeansTable(); +collapse = load 'datasets="core collapse and blue stragglers" tables="collapseMyr_vs_bssdynBssEvol"' using BeansTable(); +j = join sys by moccaid, collapse by dsid; +j = foreach j generate sys::tbid as tbid, + sys::dsid as dsid, + sys::moccaid as moccaid, + sys::tphys as tphys, + sys::smt as smt, + sys::r1 as r1, + sys::rcob as rcob, + sys::rchut2 as rchut2, + sys::rhob as rhob, + sys::vc as vc, + collapse::collapseMyr as collapseMyr, + collapse::confidence as confidence, + (sys::tphys < collapse::collapseMyr ? 0 : 1) as collapsed:int + ; +store j into 'name="sys_all_datasets_with_collapse"' using BeansTable();