From a924fd5610b1080a9f14ab561a6d4c27af953997 Mon Sep 17 00:00:00 2001 From: lpinne Date: Wed, 15 May 2024 13:54:00 +0200 Subject: [PATCH] SAP-convergent-mediation-ha-setup-sle15-docinfo.xml SAP-convergent-mediation-ha-setup-sle15.adoc SAPNotes-convergent-mediation.adoc Var_SAP-convergent-mediation.adoc: minimal SP, admin procedures, test cases --- ...rgent-mediation-ha-setup-sle15-docinfo.xml | 2 +- ...P-convergent-mediation-ha-setup-sle15.adoc | 155 +++++++++++++++--- adoc/SAPNotes-convergent-mediation.adoc | 1 + adoc/Var_SAP-convergent-mediation.adoc | 26 +-- 4 files changed, 149 insertions(+), 35 deletions(-) diff --git a/adoc/SAP-convergent-mediation-ha-setup-sle15-docinfo.xml b/adoc/SAP-convergent-mediation-ha-setup-sle15-docinfo.xml index 262d6003..50c0d085 100644 --- a/adoc/SAP-convergent-mediation-ha-setup-sle15-docinfo.xml +++ b/adoc/SAP-convergent-mediation-ha-setup-sle15-docinfo.xml @@ -61,7 +61,7 @@ optimized in various ways for SAP* applications. This document explains how to deploy an SAP Convergent Mediation ControlZone High Availability Cluster solution. - It is based on SUSE Linux Enterprise Server for SAP Applications 15 SP5. The concept however can also be used with + It is based on SUSE Linux Enterprise Server for SAP Applications 15 SP4. The concept however can also be used with newer service packs of SUSE Linux Enterprise Server for SAP Applications. diff --git a/adoc/SAP-convergent-mediation-ha-setup-sle15.adoc b/adoc/SAP-convergent-mediation-ha-setup-sle15.adoc index 6cc95c4b..eac9828c 100644 --- a/adoc/SAP-convergent-mediation-ha-setup-sle15.adoc +++ b/adoc/SAP-convergent-mediation-ha-setup-sle15.adoc @@ -26,14 +26,6 @@ TODO PRIOx: example The following sections focus on background information and the purpose of the document at hand. -=== Introduction - -{sles4sapReg} is the optimal platform to run {sapReg} applications with high -availability. Together with a redundant layout of the technical infrastructure, -single points of failure can be eliminated. - -TODO - === Abstract @@ -782,7 +774,7 @@ Cluster Summary: * Last updated: Tue May 14 17:03:30 2024 * Last change: Mon Apr 22 15:00:58 2024 by root via cibadmin on {myNode2} * 2 nodes configured - * 6 resource instances configured + * 5 resource instances configured Node List: * Online: [ {myNode1} {myNode2} ] @@ -815,7 +807,29 @@ FIRSTIME=$(date +%s) [id="sec.testing"] === Testing the cluster -TODO +As with any HA cluster, testing is crucial. Make sure that all test cases derived +from customer expectations are conducted and passed. Otherwise the project is likely +to fail in production. + +- Set up a test cluster for testing configuration changes and administrative +procedures before applying them on the production cluster. + +- Carefully define, perform, and document tests for all scenarios that should be +covered, as well as all maintenance procedures. + +- Test ControlZone features without Linux cluster before doing the overall +cluster tests. + +- Test basic Linux cluster features without ControlZone before doing the overall +cluster tests. + +Test cases for the basic Linux cluster as well as test cases for the bar CM +ControlZone components are not covered in this document. Plese refer to the +respective product documentation for this cases. +// TODO PRIO2: URLs to product docu fot tests + +The following list shows common test cases for the CM ControlZone resources managed +by the HA cluster. ==== Manually restarting ControlZone resources in-place @@ -829,7 +843,7 @@ TODO TODO -==== Testing ControlZone migration by cluster on operating system failure +==== Testing ControlZone migration by cluster on OS or node failure TODO @@ -841,38 +855,137 @@ TODO TODO +//// ==== Additional tests +// TODO PRIO3: add basic tests +//// -TODO basic cluster tests - -TODO == Administration +HA clusters are complex, the CM ControlZone is complex. +Deploying and running HA clusters for CM ControlZonen needs preparation and +carefulness. Fortunately, most pitfalls and lots of proven procedures are already +known. This chapter outlines common administrative tasks. + === Dos and don'ts +Five basic rules will help to avoid known issues. + +- Carefully test all configuration changes and administrative procedures on the +test cluster before applying them on the production cluster. + +- Before doing anything, always check for the Linux cluster's idle status, +left-over migration constraints, and resource failures as well as the +ControlZone status. + +- Be patient. For detecting the overall ControlZone status, the Linux cluster +needs a certain amount of time, depending on the ControlZone services and the +configured intervals and timeouts. + +- As long as the ControlZone components are managed by the Linux cluster, they +must never be started/stopped/moved from outside. Thus no manual actions are done. + +See also the manual page SAPCMControlZone_maintenance_examples(7), +SAPCMControlZone_basic_cluster(7) and ocf_suse_SAPCMControlZone(7). + +==== Showing status of ControlZone resources and HA cluster + +This steps should be performed before doing anything with the cluster, and after +something has been done. + +[subs="specialchars,attributes"] +---- +# su - {mySapAdm} -c "mzsh status" +# crm_mon -1r +# crm configure show | grep cli- +# cibadmin -Q | grep fail-count +# cs_clusterstate -i +---- + +See also manual page SAPCMControlZone_maintenance_examples(7), crm_mon(8), +cs_clusterstate(8), cs_show_cluster_actions(8). + +==== Starting the ControlZone resources + TODO +[subs="specialchars,attributes"] +---- +# crm_mon -1r +# cs_wait_for_idle -s 6 +# crm resource start grp_cz_{mySid} +# cs_wait_for_idle -s 6; crm_mon -1r +---- -==== Stopping an starting the ControlZone resources +==== Stopping the ControlZone resources TODO +[subs="specialchars,attributes"] +---- +# crm_mon -1r +# cs_wait_for_idle -s 6 +# crm resource stop grp_cz_{mySid} +# cs_wait_for_idle -s 6; crm_mon -1r +---- ==== Migrating the ControlZone resources -TODO +ControlZone application and Linux cluster are checked for clean and idle state. +The ControlZone resources are moved to the other node. The related location rule +is removed after the takeover took place. ControlZone application and HA cluster +are checked for clean and idle state. -==== Setting ControlZone resources into maintenance mode +[subs="specialchars,attributes"] +---- +# su - {mySapAdm} -c "mzsh status" +# crm_mon -1r +# crm configure show | grep cli- +# cibadmin -Q | grep fail-count +# cs_clusterstate -i -TODO +# crm resource move grp_cz_{mySid} force +# cs_wait_for_idle -s 9; crm_mon -1r +# crm resource clear grp_cz_{mySid} -==== Ending ControlZone resources maintenance +# cs_wait_for_idle -s 6; crm_mon -1r +# crm configure show | grep cli- +# su - {mySapAdm} -c "mzsh status" +---- -TODO +==== Example for generic maintenance procedure. + +ControlZone application and HA cluster are checked for clean and idle state. +The ControlZone resource group is set into maintenance mode. This is needed to +allow manual actions on the resources. After the manual actions are done, the +resource group is put back under cluster control. It is neccessary to wait for +each step to complete and to check the result. ControlZone application and HA +cluster are finally checked for clean and idle state. + +[subs="specialchars,attributes"] +---- +# su - {mySapAdm} -c "mzsh status" +# crm_mon -1r +# crm configure show | grep cli- +# cibadmin -Q | grep fail-count +# cs_clusterstate -i +# crm resource maintenance grp_cz_{mySid} + + -==== Cleaning up resources +# crm resource refresh grp_cz_{mySid} +# cs_wait_for_idle -s 6; crm_mon -1r +# crm resource maintenance grp_cz_{mySid} off +# cs_wait_for_idle -s 6; crm_mon -1r +# su - {mySapAdm} -c "mzsh status" +---- + +==== Cleaning up resource failcount TODO +[subs="specialchars,attributes"] +---- +---- diff --git a/adoc/SAPNotes-convergent-mediation.adoc b/adoc/SAPNotes-convergent-mediation.adoc index 5ae396d7..1a842602 100644 --- a/adoc/SAPNotes-convergent-mediation.adoc +++ b/adoc/SAPNotes-convergent-mediation.adoc @@ -14,6 +14,7 @@ - crm_simulate(8) - cs_clusterstate(8) - cs_man2pdf(8) +- cs_show_cluster_actions(8) - cs_show_sbd_devices(8) - cs_wait_for_idle(8) - fstab(5) diff --git a/adoc/Var_SAP-convergent-mediation.adoc b/adoc/Var_SAP-convergent-mediation.adoc index 3e93b1e0..d7681999 100644 --- a/adoc/Var_SAP-convergent-mediation.adoc +++ b/adoc/Var_SAP-convergent-mediation.adoc @@ -1,30 +1,30 @@ -:mySid: EN2 -:mySidLc: en2 +:mySid: C11 +:mySidLc: c11 :mySapAdm: {mySidLc}adm :mySapPwd: -:hanaSidDB: HA1 +:hanaSidDB: H11 -:mzadm: mzadm +:mzadm: c11adm :myDev: /dev/sda -:myDevA: /dev/disk/by-id/SUSE-Example-A +:myDevA: /dev/disk/by-id/Example-A :myDevPartSbd: {myDevA}-part1 :mzsh: mzsh -:mzhome: /opt/mz/{mySapAdm} +:mzhome: /opt/cz/{mySid} :mzshpath: {mzhome}/bin/ -:mzdata: /opt/mz/{mySapAdm} +:mzdata: /opt/cz/{mySid}/interface :myNFSSrv: 192.168.1.1 :myNFSSapmedia: /sapmedia :mySAPinst: /sapmedia/SWPM20_P9/ -:myVipNcz: sap{mySidLc}cz -:myVipNDb: sap{mySidLc}db +:myVipNcz: {mySidLc}cz +:myVipNDb: {mySidLc}db -:myNode1: valuga01 -:myNode2: valuga02 +:myNode1: akka01 +:myNode2: akka02 :myIPNode1: 192.168.1.100 :myIPNode2: 192.168.1.101 @@ -35,7 +35,7 @@ :myHaNetIf: eth0 :sap: SAP -:sapReg: SAP* +:sapReg: SAP(R) :sapBS: {SAP} Business Suite :sapBSReg: {SAPReg} Business Suite :sapNW: {SAP} NetWeaver @@ -66,5 +66,5 @@ :ConMed: Convergent Mediation :prodNr: 15 -:prodSP: SP5 +:prodSP: SP4