Update esp to automation

Cigna · Oct 21, 2018 · eb48cda · eb48cda
1 parent 870d0cb
commit eb48cda
Show file tree

Hide file tree

Showing 58 changed files with 520 additions and 431 deletions.
diff --git a/.project b/.project
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+	<name>ibis_foss</name>
+	<comment></comment>
+	<projects>
+	</projects>
+	<buildSpec>
+		<buildCommand>
+			<name>org.python.pydev.PyDevBuilder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
+	</buildSpec>
+	<natures>
+		<nature>org.python.pydev.pythonNature</nature>
+	</natures>
+</projectDescription>
diff --git a/.pydevproject b/.pydevproject
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<?eclipse-pydev version="1.0"?><pydev_project>
+<pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH">
+<path>/${PROJECT_DIR_NAME}</path>
+</pydev_pathproperty>
+<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.7</pydev_property>
+<pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">Default</pydev_property>
+</pydev_project>
diff --git a/.settings/org.eclipse.core.resources.prefs b/.settings/org.eclipse.core.resources.prefs
@@ -0,0 +1,7 @@
+eclipse.preferences.version=1
+encoding//docs/help.md=UTF-8
+encoding//docs/ibis_features.md=UTF-8
+encoding//docs/property_file_update.md=UTF-8
+encoding//docs/setup_ibis.md=UTF-8
+encoding/README.md=UTF-8
+encoding/qa1.log=UTF-8
diff --git a/README.md b/README.md
@@ -44,7 +44,7 @@ single configuration file, also called the "Request file".
 Split by | Provides an automated split by for Teradata, SQL Server, and DB2 | Without the automated split by, on a per table basis, you need to find a column that enables parallel execution of an ingestion
 Auto generating Oozie workflows |	Automatically creates XML workflows |	No manual XML creation
 Generate non ingestion workflows through "building blocks" |	Given hive and shell scripts, IBIS generates oozie workflow	| Automate running any type of script in the Data Lake
-Group tables based on schedule	| Group workflows into subworkflows based on schedule |	Tables with similar schedule can be kicked off using ESP by triggering one workflow.
+Group tables based on schedule	| Group workflows into subworkflows based on schedule |	Tables with similar schedule can be kicked off using Automation by triggering one workflow.
 Use Parquet |	Store data in Parquet |	Efficient storage + fast queries!
 Follows Lambda Architecture	| Storing data in the base layer, as immutable data
 Allows for data export to any RDBMS	|   Allows to export data from hive to oracle, db2, sqlserver, mysql, Teradata | For generating reports based on exported data to RDBMS
@@ -65,7 +65,7 @@ Command --help would list the IBIS Functionalities
 
 Under the covers, IBIS manages the information required to pull in data sources
 into HDFS, including usernames, passwords, JBDC connection info, and also keeps
-track of ESP ids, used for scheduling jobs in Production.
+track of Automation ids, used for scheduling jobs in Production.
 
 
 IBIS also has shell that allows you to run your workflow, when it's been created.
@@ -133,7 +133,7 @@ ibis-shell --submit-request <path to requestfile.txt> --env prod
                                                                        [small, medium, heavy]
             views:fake_view_im|fake_view_open                              <---- Views (optional)
             check_column:TRANS_TIME                        <---- Column for incremental (optional)
-            esp_group:magic                                <---- used for grouping tables in esp (optional)
+            automation_group:magic                                <---- used for grouping tables in Automation (optional)
             fetch_size:50000                               <---- sqoop rows fetch size (optional)
             hold:1                                         <---- workflow wont be generated (optional)
             split_by:fake_nd_tablename_NUM                               <---- Used for sqoop import (recommended)
@@ -198,9 +198,9 @@ ibis-shell --update-it-table <path to tables.txt> --env prod
                                                                     [small, medium, heavy]
         fetch_size:                                     <---- No value given. Just ignores
         hold:0
-        esp_appl_id:null                                <---- set null to empty the column value
+        automation_appl_id:null                                <---- set null to empty the column value
         views:fake_view_im|fake_view_open               <---- Pipe(|) seperated values
-        esp_group:magic_table
+        automation_group:magic_table
         check_column:fake_nd_tablename_NUM              <---- Sqoop incremental column
         source_database_name:fake_database                    (mandatory)
         source_table_name:fake_client_tablename               (mandatory)
@@ -218,14 +218,14 @@ ibis-shell --view --view-name <view_name> --db  <name> --table <name>  # Minimum
 
 #### Create a workflow based on schedule/frequency
 ```
-ibis-shell --gen-esp-workflows <frequency>  # frequency choices['weekly', 'monthly', 'quarterly', or 'biweekly']
+ibis-shell --gen-automation-workflows <frequency>  # frequency choices['weekly', 'monthly', 'quarterly', or 'biweekly']
 ```
 
 ----------
 
 ##### Create workflows with subworkflows based on one or more filters
 ```
-ibis-shell --gen-esp-workflow schedule=None database=None jdbc_source=None
+ibis-shell --gen-automation-workflow schedule=None database=None jdbc_source=None
         # schedule choices - none, daily, biweekly, weekly, fortnightly, monthly, quarterly
         jdbc_source choices - oracle, db2, teradata, sqlserver
 ```

diff --git a/config_parser.py b/config_parser.py
@@ -34,7 +34,8 @@ def parses_config():
         print 'Error: Given section, {} not found!'.format(section)
         sys.exit(1)
 
-    return ''.join(map((lambda x: x[0]+'='+x[1]+' '), parser.items(section)))
+    return ''.join(
+        map((lambda x: x[0] + '=' + x[1] + ' '), parser.items(section)))
 
 
 def main():

diff --git a/docs/help.md b/docs/help.md
@@ -43,11 +43,11 @@ Python 2.7.8
 
                                     [--where WHERE]
 
-                                    [--gen-esp-workflow GEN_ESP_WORKFLOW [GEN_ESP_WORKFLOW ...]]
+                                    [--gen-automation-workflow GEN_automation_WORKFLOW [GEN_automation_WORKFLOW ...]]
 
-                                    [--gen-esp-workflow-tables GEN_ESP_WORKFLOW_TABLES]
+                                    [--gen-automation-workflow-tables GEN_automation_WORKFLOW_TABLES]
 
-                                    [--gen-export-esp-workflow-tables GEN_EXPORT_ESP_WORKFLOW_TABLES]
+                                    [--gen-export-automation-workflow-tables GEN_EXPORT_automation_WORKFLOW_TABLES]
 
                                     [--data-mask DATA_MASK]
 
@@ -57,7 +57,7 @@ Python 2.7.8
 
                                     [--queue-name QUEUE_NAME]
 
-                                    [--esp-id ESP_ID] [--message MESSAGE]
+                                    [--automation-id automation_ID] [--message MESSAGE]
 
                                     [--export] [--to TO] [--auth-test]
 
@@ -177,15 +177,15 @@ Python 2.7.8
 
   --where WHERE         Used to provide a where statement
 
-  --gen-esp-workflow GEN_ESP_WORKFLOW [GEN_ESP_WORKFLOW ...]
-                        Create workflow(s) based on a list of ESP ids
+  --gen-automation-workflow GEN_automation_WORKFLOW [GEN_automation_WORKFLOW ...]
+                        Create workflow(s) based on a list of automation ids
                         separated by spaces.
 
-  --gen-esp-workflow-tables GEN_ESP_WORKFLOW_TABLES
+  --gen-automation-workflow-tables GEN_automation_WORKFLOW_TABLES
                         Create workflow(s) based on a list of tables from
                         request file
 
-  --gen-export-esp-workflow-tables GEN_EXPORT_ESP_WORKFLOW_TABLES
+  --gen-export-automation-workflow-tables GEN_EXPORT_automation_WORKFLOW_TABLES
                         Create export workflow(s) based on a table from
                         request file
 
@@ -203,7 +203,7 @@ Python 2.7.8
   --queue-name QUEUE_NAME
                         Used for providing hadoop queue name
 
-  --esp-id ESP_ID       esp-appl-id
+  --automation-id automation_ID       automation-appl-id
 
   --message MESSAGE     Provide description for bmrs
 
@@ -328,8 +328,6 @@ Python 2.7.8
 
   --ingest-version      Get the ingest version used for the xml
 
-  --skip-profile        Flag to activate podium profiling
-
   --kite-ingest KITE_INGEST
                         Used to generate kite-ingest workflow
 

diff --git a/docs/ibis_features.md b/docs/ibis_features.md
@@ -31,7 +31,7 @@ Provide respective parameters in request file for ingestion to Hadoop and export
 Split by | Provides an automated split by for Teradata, SQL Server, and DB2 | Without the automated split by, on a per table basis, you need to find a column that enables parrallel executuion of an ingestion
 Auto generating Oozie workflows |	Automatically creates XML workflows |	No manual XML creation
 Generate non ingestion workflows through "building blocks" |	Given hive and shell scripts, IBIS generates oozie workflow	| Automate running any type of script in the Data Lake
-Group tables based on schedule	| Group workflows into subworkflows based on schedule |	Tables with similar schedule can be kicked off using ESP by triggering one workflow.
+Group tables based on schedule	| Group workflows into subworkflows based on schedule |	Tables with similar schedule can be kicked off using  automation by triggering one workflow.
 Use Parquet |	Store data in Parquet |	Efficient storage + fast queries!
 Follows Lambda Architecture	| Storing data in the base layer, as immutable data
 Allows for data export to any RDBMS	|
@@ -48,7 +48,7 @@ Command --help would list the IBIS Functionalities
 
 Under the covers, IBIS manages the information required to pull in data sources
 into HDFS, including usernames, passwords, JBDC connection info, and also keeps
-track of ESP ids, used for scheduling jobs.
+track of automation ids, used for scheduling jobs.
 
 
 IBIS also has shell that allows you to run your workflow, when it's been created.
@@ -115,7 +115,7 @@ db_env:int
                                                                        [small, medium, heavy]
             views:fake_view_im|fake_view_open                              <---- Views (optional)
             check_column:TRANS_TIME                        <---- Column for incremental (optional)
-            esp_group:magic                                <---- used for grouping tables in esp (optional)
+            automation_group:magic                                <---- used for grouping tables in automation (optional)
             fetch_size:50000                               <---- sqoop rows fetch size (optional)
             hold:1                                         <---- workflow wont be generated (optional)
             split_by:fake_nd_tablename_NUM                               <---- Used for sqoop import (recommended)
@@ -172,9 +172,9 @@ db_env:int
                                                                     [small, medium, heavy]
         fetch_size:                                     <---- No value given. Just ignores
         hold:0
-        esp_appl_id:null                                <---- set null to empty the column value
+        automation_appl_id:null                                <---- set null to empty the column value
         views:fake_view_im|fake_view_open                              <---- Pipe(|) seperated values
-        esp_group:magic_table
+        automation_group:magic_table
         check_column:fake_nd_tablename_NUM                            <---- Sqoop incremental column
         source_database_name:fake_database        (mandatory)
         source_table_name:fake_client_tablename           (mandatory)
@@ -189,12 +189,12 @@ db_env:int
 ----------
 
 #### Create a workflow based on schedule/frequency
-```ibis-shell --gen-esp-workflows <frequency>  # frequency choices['weekly', 'monthly', 'quarterly', or 'biweekly']```
+```ibis-shell --gen-automation-workflows <frequency>  # frequency choices['weekly', 'monthly', 'quarterly', or 'biweekly']```
 
 ----------
 
 ##### Create workflows with subworkflows based on one or more filters
-```ibis-shell --gen-esp-workflow schedule=None database=None jdbc_source=None
+```ibis-shell --gen-automation-workflow schedule=None database=None jdbc_source=None
         # schedule choices - none, daily, biweekly, weekly, fortnightly, monthly, quarterly
         jdbc_source choices - oracle, db2, teradata, sqlserver```
 
@@ -208,7 +208,7 @@ db_env:int
 ### PERF environment - automating loads in a lower Hadoop env for testing
 ###### Example
 
-DB_name.Table_name is scheduled via ESP to refresh every Monday at 6pm.
+DB_name.Table_name is scheduled via automation to refresh every Monday at 6pm.
 
 Team A wants it every Monday
 Team B wants it every Month
@@ -218,9 +218,9 @@ Team B will be able to run its APPL / JOB to pull the data every month
 
 Because the data will just land in the IBIS base domain, nothing will effect the team's current PERF data. The APPL / JOBs that teams will need to run will be created based on the "views" column in IBIS. It is up to the team that wants the data to run the data to bring the data into their sandbox space.
 
-##### Create ESP workflow from request file for PERF
+##### Create automation workflow from request file for PERF
 
-```ibis-shell  --env <env> --gen-esp-workflow-tables <requestFiles>```
+```ibis-shell  --env <env> --gen-automation-workflow-tables <requestFiles>```
 
 ----------
 
@@ -230,7 +230,7 @@ Because the data will just land in the IBIS base domain, nothing will effect the
 
 ----------
 
-##### Update the Activate flag and frequency for perf esp run
+##### Update the Activate flag and frequency for perf automation run
 
 ```ibis-shell  --env <env> --update-activator --table <table> --teamname <db_name> --activate <yes/no> --frequency <run_frequency>```
 

diff --git a/docs/property_file_update.md b/docs/property_file_update.md
@@ -22,7 +22,7 @@ Following are the list of properties to be updated
 |it_table_export=ibis.prod_it_table_export|N|Created by the ibis setup shell. Please match the table name with setup shell. Holds the entry for each table to be exported|
 |staging_database=fake_staging_datbase|N|Created by the ibis setup shell. Please match the table name with setup shell. Temporarily Holds the data load of each table |
 |checks_balances=ibis.checks_balances|N|Created by the ibis setup shell. Please match the table name with setup shell. Holds the entry for each table load|
-|esp_ids_table=ibis.esp_ids|N|Created by the ibis setup shell. Please match the table name with setup shell. Stores the Appl ID and frequency details|
+|automation_ids_table=ibis.automation_ids|N|Created by the ibis setup shell. Please match the table name with setup shell. Stores the Appl ID and frequency details|
 |staging_it_table=ibis.staging_it_table|N|Created by the ibis setup shell. Please match the table name with setup shell. Stores tables to be ingested through schedule|
 |prod_it_table=ibis.prod_it_table|N|Created by the ibis setup shell. Please match the table name with setup shell. Holds the entry for each table to be ingested|
 |queue_name=ingestion|Y|Update with HDFS queue name for loading the table|
@@ -49,7 +49,7 @@ Following are the list of properties to be updated
 |start_workflow=start.xml.mako|N| workflow start template|
 |end_workflow=end.xml.mako|N|workflow end template|
 |export_end_workflow=export_end.xml.mako|N|export workflow end template|
-|korn_shell=esp_template.ksh.mako|N|workflow KSH template|
+|korn_shell=automation_template.ksh.mako|N|workflow KSH template|
 |job_properties=prod_job.properties|N|Job properties template|
 |sub_workflow=subworkflow.xml|N|Sub workflow template|
 |export_to_td=export_to_td.xml|N|teradata workflow template|
@@ -69,10 +69,10 @@ Following are the list of properties to be updated
 |hql_views_workspace=/user/dev/oozie/workspaces/ibis/hql|N|HDFS location for HQl to be deployed||
 |shell_workspace=/user/dev/oozie/workspaces/shell-adhoc|N|HDFS location for shells to be deployed||
 |impala_workspace=/user/dev/oozie/workspaces/impala-adhoc|N|HDFS location for impala-scripts to be deployed||
-|**[ESP_ID]**|||
-|big_data=FAKE|Y|Update the ESP ID's first 4 letter's for example "GDBD" in Appl ID : GDBDD006|
+|**[automation_ID]**|||
+|big_data=FAKE|Y|Update the automation ID's first 4 letter's for example "GDBD" in Appl ID : GDBDD006|
 |frequencies_map=daily:D,biweekly:B,weekly:W,fortnightly:F,monthly:M,quarterly:Q,adhoc:A,onetime:O,mul-appls:X,yearly:Y|N|First letter of frequency is used in the Appl ID creations and its the letter in Appl ID|
-|environment_map=6|Y|Is an optional update. Its the last digit of the Appl ID used to identify the env in which ESP Appl is running, in this case '6' will be suffixed in the Appl ID : GDBDD006|
+|environment_map=6|Y|Is an optional update. Its the last digit of the Appl ID used to identify the env in which automation Appl is running, in this case '6' will be suffixed in the Appl ID : GDBDD006|
 |from_branch=prod|N||
 |**[Other]**|||
 |allowed_frequencies=000:none,101:daily,011:biweekly,100:weekly,110:fortnightly,010:monthly,001:quarterly,111:yearly|N|All allowed frequencies for scheduling workflows|