diff --git a/configs/common/GPUTLBOptions.py b/configs/common/GPUTLBOptions.py
index a17b0c7b5f..79ff1e7f67 100644
--- a/configs/common/GPUTLBOptions.py
+++ b/configs/common/GPUTLBOptions.py
@@ -35,71 +35,72 @@ def tlb_options(parser):
     # TLB Configuration
     #===================================================================
 
-    parser.add_option("--TLB-config", type="string", default="perCU",
-            help="Options are: perCU (default), mono, 2CU, or perLane")
+    parser.add_argument(
+        "--TLB-config", type=str, default="perCU",
+        help="Options are: perCU (default), mono, 2CU, or perLane")
 
     #===================================================================
     #   L1 TLB Options (D-TLB, I-TLB, Dispatcher-TLB)
     #===================================================================
 
-    parser.add_option("--L1TLBentries", type='int', default="32")
-    parser.add_option("--L1TLBassoc", type='int', default="32")
-    parser.add_option("--L1AccessLatency", type='int', default="1",
-                      help="latency in gpu cycles")
-    parser.add_option("--L1MissLatency", type='int', default="750",
-                      help="latency (in gpu cycles) of a page walk, "
-                      "if this is a last level TLB")
-    parser.add_option("--L1MaxOutstandingReqs", type='int', default="64")
-    parser.add_option("--L1AccessDistanceStat", action="store_true")
-    parser.add_option("--tot-L1TLB-size", type="int", default="0")
+    parser.add_argument("--L1TLBentries", type=int, default="32")
+    parser.add_argument("--L1TLBassoc", type=int, default="32")
+    parser.add_argument("--L1AccessLatency", type=int, default="1",
+                        help="latency in gpu cycles")
+    parser.add_argument("--L1MissLatency", type=int, default="750",
+                        help="latency (in gpu cycles) of a page walk, "
+                        "if this is a last level TLB")
+    parser.add_argument("--L1MaxOutstandingReqs", type=int, default="64")
+    parser.add_argument("--L1AccessDistanceStat", action="store_true")
+    parser.add_argument("--tot-L1TLB-size", type=int, default="0")
 
     #===================================================================
     #   L2 TLB Options
     #===================================================================
 
-    parser.add_option("--L2TLBentries", type='int', default="4096")
-    parser.add_option("--L2TLBassoc", type='int', default="32")
-    parser.add_option("--L2AccessLatency", type='int', default="69",
-                      help="latency in gpu cycles")
-    parser.add_option("--L2MissLatency", type='int', default="750",
-                      help="latency (in gpu cycles) of a page walk, "
-                      "if this is a last level TLB")
-    parser.add_option("--L2MaxOutstandingReqs", type='int', default="64")
-    parser.add_option("--L2AccessDistanceStat", action="store_true")
+    parser.add_argument("--L2TLBentries", type=int, default="4096")
+    parser.add_argument("--L2TLBassoc", type=int, default="32")
+    parser.add_argument("--L2AccessLatency", type=int, default="69",
+                        help="latency in gpu cycles")
+    parser.add_argument("--L2MissLatency", type=int, default="750",
+                        help="latency (in gpu cycles) of a page walk, "
+                        "if this is a last level TLB")
+    parser.add_argument("--L2MaxOutstandingReqs", type=int, default="64")
+    parser.add_argument("--L2AccessDistanceStat", action="store_true")
 
     #===================================================================
     #   L3 TLB Options
     #===================================================================
 
-    parser.add_option("--L3TLBentries", type='int', default="8192")
-    parser.add_option("--L3TLBassoc", type='int', default="32")
-    parser.add_option("--L3AccessLatency", type='int', default="150",
-                      help="latency in gpu cycles")
-    parser.add_option("--L3MissLatency", type='int', default="750",
-                      help="latency (in gpu cycles) of a page walk")
-    parser.add_option("--L3MaxOutstandingReqs", type='int', default="64")
-    parser.add_option("--L3AccessDistanceStat", action="store_true")
+    parser.add_argument("--L3TLBentries", type=int, default="8192")
+    parser.add_argument("--L3TLBassoc", type=int, default="32")
+    parser.add_argument("--L3AccessLatency", type=int, default="150",
+                        help="latency in gpu cycles")
+    parser.add_argument("--L3MissLatency", type=int, default="750",
+                        help="latency (in gpu cycles) of a page walk")
+    parser.add_argument("--L3MaxOutstandingReqs", type=int, default="64")
+    parser.add_argument("--L3AccessDistanceStat", action="store_true")
 
     #===================================================================
     #   L1 TLBCoalescer Options
     #===================================================================
 
-    parser.add_option("--L1ProbesPerCycle", type='int', default="2")
-    parser.add_option("--L1CoalescingWindow", type='int', default="1")
-    parser.add_option("--L1DisableCoalescing", action="store_true")
+    parser.add_argument("--L1ProbesPerCycle", type=int, default="2")
+    parser.add_argument("--L1CoalescingWindow", type=int, default="1")
+    parser.add_argument("--L1DisableCoalescing", action="store_true")
 
     #===================================================================
     #   L2 TLBCoalescer Options
     #===================================================================
 
-    parser.add_option("--L2ProbesPerCycle", type='int', default="2")
-    parser.add_option("--L2CoalescingWindow", type='int', default="1")
-    parser.add_option("--L2DisableCoalescing", action="store_true")
+    parser.add_argument("--L2ProbesPerCycle", type=int, default="2")
+    parser.add_argument("--L2CoalescingWindow", type=int, default="1")
+    parser.add_argument("--L2DisableCoalescing", action="store_true")
 
     #===================================================================
     #   L3 TLBCoalescer Options
     #===================================================================
 
-    parser.add_option("--L3ProbesPerCycle", type='int', default="2")
-    parser.add_option("--L3CoalescingWindow", type='int', default="1")
-    parser.add_option("--L3DisableCoalescing", action="store_true")
+    parser.add_argument("--L3ProbesPerCycle", type=int, default="2")
+    parser.add_argument("--L3CoalescingWindow", type=int, default="1")
+    parser.add_argument("--L3DisableCoalescing", action="store_true")
diff --git a/configs/common/Options.py b/configs/common/Options.py
index b833531f4e..75c60a80ef 100644
--- a/configs/common/Options.py
+++ b/configs/common/Options.py
@@ -36,6 +36,8 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import argparse
+
 import m5
 from m5.defines import buildEnv
 from m5.objects import *
@@ -53,440 +55,481 @@
 host PATH or selected with with: VirtIO9PDiod.diod.
 """
 
-def _listCpuTypes(option, opt, value, parser):
-    ObjectList.cpu_list.print()
-    sys.exit(0)
 
-def _listBPTypes(option, opt, value, parser):
-    ObjectList.bp_list.print()
-    sys.exit(0)
+class ListCpu(argparse.Action):
+    def __call__(self, parser, namespace, values, option_string=None):
+        ObjectList.cpu_list.print()
+        sys.exit(0)
+
+
+class ListBp(argparse.Action):
+    def __call__(self, parser, namespace, values, option_string=None):
+        ObjectList.bp_list.print()
+        sys.exit(0)
+
+
+class ListHWP(argparse.Action):
+    def __call__(self, parser, namespace, values, option_string=None):
+        ObjectList.hwp_list.print()
+        sys.exit(0)
+
+
+class ListRP(argparse.Action):
+    def __call__(self, parser, namespace, values, option_string=None):
+        ObjectList.rp_list.print()
+        sys.exit(0)
 
-def _listHWPTypes(option, opt, value, parser):
-    ObjectList.hwp_list.print()
-    sys.exit(0)
 
-def _listRPTypes(option, opt, value, parser):
-    ObjectList.rp_list.print()
-    sys.exit(0)
+class ListIndirectBP(argparse.Action):
+    def __call__(self, parser, namespace, values, option_string=None):
+        ObjectList.indirect_bp_list.print()
+        sys.exit(0)
 
-def _listIndirectBPTypes(option, opt, value, parser):
-    ObjectList.indirect_bp_list.print()
-    sys.exit(0)
 
-def _listMemTypes(option, opt, value, parser):
-    ObjectList.mem_list.print()
-    sys.exit(0)
+class ListMem(argparse.Action):
+    def __call__(self, parser, namespace, values, option_string=None):
+        ObjectList.mem_list.print()
+        sys.exit(0)
 
-def _listPlatformTypes(option, opt, value, parser):
-    ObjectList.platform_list.print()
-    sys.exit(0)
+
+class ListPlatform(argparse.Action):
+    def __call__(self, parser, namespace, values, option_string=None):
+        ObjectList.platform_list.print()
+        sys.exit(0)
 
 # Add the very basic options that work also in the case of the no ISA
 # being used, and consequently no CPUs, but rather various types of
 # testers and traffic generators.
+
+
 def addNoISAOptions(parser):
-    parser.add_option("-n", "--num-cpus", type="int", default=1)
-    parser.add_option("--sys-voltage", action="store", type="string",
-                      default='1.0V',
-                      help = """Top-level voltage for blocks running at system
+    parser.add_argument("-n", "--num-cpus", type=int, default=1)
+    parser.add_argument("--sys-voltage", action="store", type=str,
+                        default='1.0V',
+                        help="""Top-level voltage for blocks running at system
                       power supply""")
-    parser.add_option("--sys-clock", action="store", type="string",
-                      default='1GHz',
-                      help = """Top-level clock for blocks running at system
+    parser.add_argument("--sys-clock", action="store", type=str,
+                        default='1GHz',
+                        help="""Top-level clock for blocks running at system
                       speed""")
 
     # Memory Options
-    parser.add_option("--list-mem-types",
-                      action="callback", callback=_listMemTypes,
-                      help="List available memory types")
-    parser.add_option("--mem-type", type="choice", default="DDR3_1600_8x8",
-                      choices=ObjectList.mem_list.get_names(),
-                      help = "type of memory to use")
-    parser.add_option("--mem-channels", type="int", default=1,
-                      help = "number of memory channels")
-    parser.add_option("--mem-ranks", type="int", default=None,
-                      help = "number of memory ranks per channel")
-    parser.add_option("--mem-size", action="store", type="string",
-                      default="512MB",
-                      help="Specify the physical memory size (single memory)")
-    parser.add_option("--enable-dram-powerdown", action="store_true",
-                       help="Enable low-power states in DRAMInterface")
-    parser.add_option("--mem-channels-intlv", type="int", default=0,
-                      help="Memory channels interleave")
-
-
-    parser.add_option("--memchecker", action="store_true")
+    parser.add_argument("--list-mem-types",
+                        action=ListMem, nargs=0,
+                        help="List available memory types")
+    parser.add_argument("--mem-type", default="DDR3_1600_8x8",
+                        choices=ObjectList.mem_list.get_names(),
+                        help="type of memory to use")
+    parser.add_argument("--mem-channels", type=int, default=1,
+                        help="number of memory channels")
+    parser.add_argument("--mem-ranks", type=int, default=None,
+                        help="number of memory ranks per channel")
+    parser.add_argument(
+        "--mem-size", action="store", type=str, default="512MB",
+        help="Specify the physical memory size (single memory)")
+    parser.add_argument("--enable-dram-powerdown", action="store_true",
+                        help="Enable low-power states in DRAMInterface")
+    parser.add_argument("--mem-channels-intlv", type=int, default=0,
+                        help="Memory channels interleave")
+
+    parser.add_argument("--memchecker", action="store_true")
 
     # Cache Options
-    parser.add_option("--external-memory-system", type="string",
-                      help="use external ports of this port_type for caches")
-    parser.add_option("--tlm-memory", type="string",
-                      help="use external port for SystemC TLM cosimulation")
-    parser.add_option("--caches", action="store_true")
-    parser.add_option("--l2cache", action="store_true")
-    parser.add_option("--num-dirs", type="int", default=1)
-    parser.add_option("--num-l2caches", type="int", default=1)
-    parser.add_option("--num-l3caches", type="int", default=1)
-    parser.add_option("--l1d_size", type="string", default="64kB")
-    parser.add_option("--l1i_size", type="string", default="32kB")
-    parser.add_option("--l2_size", type="string", default="2MB")
-    parser.add_option("--l3_size", type="string", default="16MB")
-    parser.add_option("--l1d_assoc", type="int", default=2)
-    parser.add_option("--l1i_assoc", type="int", default=2)
-    parser.add_option("--l2_assoc", type="int", default=8)
-    parser.add_option("--l3_assoc", type="int", default=16)
-    parser.add_option("--cacheline_size", type="int", default=64)
+    parser.add_argument("--external-memory-system", type=str,
+                        help="use external ports of this port_type for caches")
+    parser.add_argument("--tlm-memory", type=str,
+                        help="use external port for SystemC TLM cosimulation")
+    parser.add_argument("--caches", action="store_true")
+    parser.add_argument("--l2cache", action="store_true")
+    parser.add_argument("--num-dirs", type=int, default=1)
+    parser.add_argument("--num-l2caches", type=int, default=1)
+    parser.add_argument("--num-l3caches", type=int, default=1)
+    parser.add_argument("--l1d_size", type=str, default="64kB")
+    parser.add_argument("--l1i_size", type=str, default="32kB")
+    parser.add_argument("--l2_size", type=str, default="2MB")
+    parser.add_argument("--l3_size", type=str, default="16MB")
+    parser.add_argument("--l1d_assoc", type=int, default=2)
+    parser.add_argument("--l1i_assoc", type=int, default=2)
+    parser.add_argument("--l2_assoc", type=int, default=8)
+    parser.add_argument("--l3_assoc", type=int, default=16)
+    parser.add_argument("--cacheline_size", type=int, default=64)
 
     # Enable Ruby
-    parser.add_option("--ruby", action="store_true")
+    parser.add_argument("--ruby", action="store_true")
 
     # Run duration options
-    parser.add_option("-m", "--abs-max-tick", type="int", default=m5.MaxTick,
-                      metavar="TICKS", help="Run to absolute simulated tick "
-                      "specified including ticks from a restored checkpoint")
-    parser.add_option("--rel-max-tick", type="int", default=None,
-                      metavar="TICKS", help="Simulate for specified number of"
-                      " ticks relative to the simulation start tick (e.g. if "
-                      "restoring a checkpoint)")
-    parser.add_option("--maxtime", type="float", default=None,
-                      help="Run to the specified absolute simulated time in "
-                      "seconds")
-    parser.add_option("-P", "--param", action="append", default=[],
+    parser.add_argument("-m", "--abs-max-tick", type=int, default=m5.MaxTick,
+                        metavar="TICKS", help="Run to absolute simulated tick "
+                        "specified including ticks from a restored checkpoint")
+    parser.add_argument(
+        "--rel-max-tick", type=int, default=None, metavar="TICKS",
+        help="Simulate for specified number of"
+        " ticks relative to the simulation start tick (e.g. if "
+        "restoring a checkpoint)")
+    parser.add_argument("--maxtime", type=float, default=None,
+                        help="Run to the specified absolute simulated time in "
+                        "seconds")
+    parser.add_argument(
+        "-P", "--param", action="append", default=[],
         help="Set a SimObject parameter relative to the root node. "
-             "An extended Python multi range slicing syntax can be used "
-             "for arrays. For example: "
-             "'system.cpu[0,1,3:8:2].max_insts_all_threads = 42' "
-             "sets max_insts_all_threads for cpus 0, 1, 3, 5 and 7 "
-             "Direct parameters of the root object are not accessible, "
-             "only parameters of its children.")
+        "An extended Python multi range slicing syntax can be used "
+        "for arrays. For example: "
+        "'system.cpu[0,1,3:8:2].max_insts_all_threads = 42' "
+        "sets max_insts_all_threads for cpus 0, 1, 3, 5 and 7 "
+        "Direct parameters of the root object are not accessible, "
+        "only parameters of its children.")
 
 # Add common options that assume a non-NULL ISA.
+
+
 def addCommonOptions(parser):
     # start by adding the base options that do not assume an ISA
     addNoISAOptions(parser)
 
     # system options
-    parser.add_option("--list-cpu-types",
-                      action="callback", callback=_listCpuTypes,
-                      help="List available CPU types")
-    parser.add_option("--cpu-type", type="choice", default="AtomicSimpleCPU",
-                      choices=ObjectList.cpu_list.get_names(),
-                      help = "type of cpu to run with")
-    parser.add_option("--list-bp-types",
-                      action="callback", callback=_listBPTypes,
-                      help="List available branch predictor types")
-    parser.add_option("--list-indirect-bp-types",
-                      action="callback", callback=_listIndirectBPTypes,
-                      help="List available indirect branch predictor types")
-    parser.add_option("--bp-type", type="choice", default=None,
-                      choices=ObjectList.bp_list.get_names(),
-                      help = """
-                      type of branch predictor to run with
-                      (if not set, use the default branch predictor of
-                      the selected CPU)""")
-    parser.add_option("--indirect-bp-type", type="choice", default=None,
-                      choices=ObjectList.indirect_bp_list.get_names(),
-                      help = "type of indirect branch predictor to run with")
-
-    parser.add_option("--list-rp-types",
-                      action="callback", callback=_listRPTypes,
-                      help="List available replacement policy types")
-
-    parser.add_option("--list-hwp-types",
-                      action="callback", callback=_listHWPTypes,
-                      help="List available hardware prefetcher types")
-    parser.add_option("--l1i-hwp-type", type="choice", default=None,
-                      choices=ObjectList.hwp_list.get_names(),
-                      help = """
-                      type of hardware prefetcher to use with the L1
-                      instruction cache.
-                      (if not set, use the default prefetcher of
-                      the selected cache)""")
-    parser.add_option("--l1d-hwp-type", type="choice", default=None,
-                      choices=ObjectList.hwp_list.get_names(),
-                      help = """
-                      type of hardware prefetcher to use with the L1
-                      data cache.
-                      (if not set, use the default prefetcher of
-                      the selected cache)""")
-    parser.add_option("--l2-hwp-type", type="choice", default=None,
-                      choices=ObjectList.hwp_list.get_names(),
-                      help = """
-                      type of hardware prefetcher to use with the L2 cache.
-                      (if not set, use the default prefetcher of
-                      the selected cache)""")
-    parser.add_option("--checker", action="store_true");
-    parser.add_option("--cpu-clock", action="store", type="string",
-                      default='2GHz',
-                      help="Clock for blocks running at CPU speed")
-    parser.add_option("--smt", action="store_true", default=False,
-                      help = """
+    parser.add_argument("--list-cpu-types",
+                        action=ListCpu, nargs=0,
+                        help="List available CPU types")
+    parser.add_argument("--cpu-type", default="AtomicSimpleCPU",
+                        choices=ObjectList.cpu_list.get_names(),
+                        help="type of cpu to run with")
+    parser.add_argument("--list-bp-types",
+                        action=ListBp, nargs=0,
+                        help="List available branch predictor types")
+    parser.add_argument("--list-indirect-bp-types",
+                        action=ListIndirectBP, nargs=0,
+                        help="List available indirect branch predictor types")
+    parser.add_argument("--bp-type", default=None,
+                        choices=ObjectList.bp_list.get_names(),
+                        help="""
+                        type of branch predictor to run with
+                        (if not set, use the default branch predictor of
+                        the selected CPU)""")
+    parser.add_argument("--indirect-bp-type", default=None,
+                        choices=ObjectList.indirect_bp_list.get_names(),
+                        help="type of indirect branch predictor to run with")
+
+    parser.add_argument("--list-rp-types",
+                        action=ListRP, nargs=0,
+                        help="List available replacement policy types")
+
+    parser.add_argument("--list-hwp-types",
+                        action=ListHWP, nargs=0,
+                        help="List available hardware prefetcher types")
+    parser.add_argument("--l1i-hwp-type", default=None,
+                        choices=ObjectList.hwp_list.get_names(),
+                        help="""
+                        type of hardware prefetcher to use with the L1
+                        instruction cache.
+                        (if not set, use the default prefetcher of
+                        the selected cache)""")
+    parser.add_argument("--l1d-hwp-type", default=None,
+                        choices=ObjectList.hwp_list.get_names(),
+                        help="""
+                        type of hardware prefetcher to use with the L1
+                        data cache.
+                        (if not set, use the default prefetcher of
+                        the selected cache)""")
+    parser.add_argument("--l2-hwp-type", default=None,
+                        choices=ObjectList.hwp_list.get_names(),
+                        help="""
+                        type of hardware prefetcher to use with the L2 cache.
+                        (if not set, use the default prefetcher of
+                        the selected cache)""")
+    parser.add_argument("--checker", action="store_true")
+    parser.add_argument("--cpu-clock", action="store", type=str,
+                        default='2GHz',
+                        help="Clock for blocks running at CPU speed")
+    parser.add_argument("--smt", action="store_true", default=False,
+                        help="""
                       Only used if multiple programs are specified. If true,
                       then the number of threads per cpu is same as the
                       number of programs.""")
-    parser.add_option("--elastic-trace-en", action="store_true",
-                      help="""Enable capture of data dependency and instruction
+    parser.add_argument(
+        "--elastic-trace-en", action="store_true",
+        help="""Enable capture of data dependency and instruction
                       fetch traces using elastic trace probe.""")
     # Trace file paths input to trace probe in a capture simulation and input
     # to Trace CPU in a replay simulation
-    parser.add_option("--inst-trace-file", action="store", type="string",
-                      help="""Instruction fetch trace file input to
+    parser.add_argument("--inst-trace-file", action="store", type=str,
+                        help="""Instruction fetch trace file input to
                       Elastic Trace probe in a capture simulation and
                       Trace CPU in a replay simulation""", default="")
-    parser.add_option("--data-trace-file", action="store", type="string",
-                      help="""Data dependency trace file input to
+    parser.add_argument("--data-trace-file", action="store", type=str,
+                        help="""Data dependency trace file input to
                       Elastic Trace probe in a capture simulation and
                       Trace CPU in a replay simulation""", default="")
 
-    parser.add_option("-l", "--lpae", action="store_true")
-    parser.add_option("-V", "--virtualisation", action="store_true")
+    parser.add_argument("-l", "--lpae", action="store_true")
+    parser.add_argument("-V", "--virtualisation", action="store_true")
 
     # dist-gem5 options
-    parser.add_option("--dist", action="store_true",
-                      help="Parallel distributed gem5 simulation.")
-    parser.add_option("--dist-sync-on-pseudo-op", action="store_true",
-                      help="Use a pseudo-op to start dist-gem5 synchronization.")
-    parser.add_option("--is-switch", action="store_true",
-                      help="Select the network switch simulator process for a"\
-                      "distributed gem5 run")
-    parser.add_option("--dist-rank", default=0, action="store", type="int",
-                      help="Rank of this system within the dist gem5 run.")
-    parser.add_option("--dist-size", default=0, action="store", type="int",
-                      help="Number of gem5 processes within the dist gem5 run.")
-    parser.add_option("--dist-server-name",
-                      default="127.0.0.1",
-                      action="store", type="string",
-                      help="Name of the message server host\nDEFAULT: localhost")
-    parser.add_option("--dist-server-port",
-                      default=2200,
-                      action="store", type="int",
-                      help="Message server listen port\nDEFAULT: 2200")
-    parser.add_option("--dist-sync-repeat",
-                      default="0us",
-                      action="store", type="string",
-                      help="Repeat interval for synchronisation barriers among dist-gem5 processes\nDEFAULT: --ethernet-linkdelay")
-    parser.add_option("--dist-sync-start",
-                      default="5200000000000t",
-                      action="store", type="string",
-                      help="Time to schedule the first dist synchronisation barrier\nDEFAULT:5200000000000t")
-    parser.add_option("--ethernet-linkspeed", default="10Gbps",
-                        action="store", type="string",
+    parser.add_argument("--dist", action="store_true",
+                        help="Parallel distributed gem5 simulation.")
+    parser.add_argument(
+        "--dist-sync-on-pseudo-op", action="store_true",
+        help="Use a pseudo-op to start dist-gem5 synchronization.")
+    parser.add_argument(
+        "--is-switch", action="store_true",
+        help="Select the network switch simulator process for a"
+        "distributed gem5 run")
+    parser.add_argument("--dist-rank", default=0, action="store", type=int,
+                        help="Rank of this system within the dist gem5 run.")
+    parser.add_argument(
+        "--dist-size", default=0, action="store", type=int,
+        help="Number of gem5 processes within the dist gem5 run.")
+    parser.add_argument(
+        "--dist-server-name", default="127.0.0.1", action="store", type=str,
+        help="Name of the message server host\nDEFAULT: localhost")
+    parser.add_argument("--dist-server-port",
+                        default=2200,
+                        action="store", type=int,
+                        help="Message server listen port\nDEFAULT: 2200")
+    parser.add_argument(
+        "--dist-sync-repeat", default="0us", action="store", type=str,
+        help="Repeat interval for synchronisation barriers among "
+        "dist-gem5 processes\nDEFAULT: --ethernet-linkdelay")
+    parser.add_argument(
+        "--dist-sync-start", default="5200000000000t", action="store",
+        type=str,
+        help="Time to schedule the first dist synchronisation barrier\n"
+        "DEFAULT:5200000000000t")
+    parser.add_argument("--ethernet-linkspeed", default="10Gbps",
+                        action="store", type=str,
                         help="Link speed in bps\nDEFAULT: 10Gbps")
-    parser.add_option("--ethernet-linkdelay", default="10us",
-                      action="store", type="string",
-                      help="Link delay in seconds\nDEFAULT: 10us")
+    parser.add_argument("--ethernet-linkdelay", default="10us",
+                        action="store", type=str,
+                        help="Link delay in seconds\nDEFAULT: 10us")
 
     # Run duration options
-    parser.add_option("-I", "--maxinsts", action="store", type="int",
-                      default=None, help="""Total number of instructions to
+    parser.add_argument("-I", "--maxinsts", action="store", type=int,
+                        default=None, help="""Total number of instructions to
                                             simulate (default: run forever)""")
-    parser.add_option("--work-item-id", action="store", type="int",
-                      help="the specific work id for exit & checkpointing")
-    parser.add_option("--num-work-ids", action="store", type="int",
-                      help="Number of distinct work item types")
-    parser.add_option("--work-begin-cpu-id-exit", action="store", type="int",
-                      help="exit when work starts on the specified cpu")
-    parser.add_option("--work-end-exit-count", action="store", type="int",
-                      help="exit at specified work end count")
-    parser.add_option("--work-begin-exit-count", action="store", type="int",
-                      help="exit at specified work begin count")
-    parser.add_option("--init-param", action="store", type="int", default=0,
-                      help="""Parameter available in simulation with m5
+    parser.add_argument("--work-item-id", action="store", type=int,
+                        help="the specific work id for exit & checkpointing")
+    parser.add_argument("--num-work-ids", action="store", type=int,
+                        help="Number of distinct work item types")
+    parser.add_argument("--work-begin-cpu-id-exit", action="store", type=int,
+                        help="exit when work starts on the specified cpu")
+    parser.add_argument("--work-end-exit-count", action="store", type=int,
+                        help="exit at specified work end count")
+    parser.add_argument("--work-begin-exit-count", action="store", type=int,
+                        help="exit at specified work begin count")
+    parser.add_argument("--init-param", action="store", type=int, default=0,
+                        help="""Parameter available in simulation with m5
                               initparam""")
-    parser.add_option("--initialize-only", action="store_true", default=False,
-                      help="""Exit after initialization. Do not simulate time.
+    parser.add_argument(
+        "--initialize-only", action="store_true", default=False,
+        help="""Exit after initialization. Do not simulate time.
                               Useful when gem5 is run as a library.""")
 
     # Simpoint options
-    parser.add_option("--simpoint-profile", action="store_true",
-                      help="Enable basic block profiling for SimPoints")
-    parser.add_option("--simpoint-interval", type="int", default=10000000,
-                      help="SimPoint interval in num of instructions")
-    parser.add_option("--take-simpoint-checkpoints", action="store", type="string",
+    parser.add_argument("--simpoint-profile", action="store_true",
+                        help="Enable basic block profiling for SimPoints")
+    parser.add_argument("--simpoint-interval", type=int, default=10000000,
+                        help="SimPoint interval in num of instructions")
+    parser.add_argument(
+        "--take-simpoint-checkpoints", action="store", type=str,
         help="<simpoint file,weight file,interval-length,warmup-length>")
-    parser.add_option("--restore-simpoint-checkpoint", action="store_true",
-        default=False,
-        help="restore from a simpoint checkpoint taken with " +
-             "--take-simpoint-checkpoints")
+    parser.add_argument("--restore-simpoint-checkpoint", action="store_true",
+                        default=False,
+                        help="restore from a simpoint checkpoint taken with " +
+                        "--take-simpoint-checkpoints")
 
     # Checkpointing options
-    ###Note that performing checkpointing via python script files will override
-    ###checkpoint instructions built into binaries.
-    parser.add_option("--take-checkpoints", action="store", type="string",
+    # Note that performing checkpointing via python script files will override
+    # checkpoint instructions built into binaries.
+    parser.add_argument(
+        "--take-checkpoints", action="store", type=str,
         help="<M,N> take checkpoints at tick M and every N ticks thereafter")
-    parser.add_option("--max-checkpoints", action="store", type="int",
+    parser.add_argument(
+        "--max-checkpoints", action="store", type=int,
         help="the maximum number of checkpoints to drop", default=5)
-    parser.add_option("--checkpoint-dir", action="store", type="string",
+    parser.add_argument(
+        "--checkpoint-dir", action="store", type=str,
         help="Place all checkpoints in this absolute directory")
-    parser.add_option("-r", "--checkpoint-restore", action="store", type="int",
-        help="restore from checkpoint <N>")
-    parser.add_option("--checkpoint-at-end", action="store_true",
-                      help="take a checkpoint at end of run")
-    parser.add_option("--work-begin-checkpoint-count", action="store", type="int",
-                      help="checkpoint at specified work begin count")
-    parser.add_option("--work-end-checkpoint-count", action="store", type="int",
-                      help="checkpoint at specified work end count")
-    parser.add_option("--work-cpus-checkpoint-count", action="store", type="int",
-                      help="checkpoint and exit when active cpu count is reached")
-    parser.add_option("--restore-with-cpu", action="store", type="choice",
-                      default="AtomicSimpleCPU",
-                      choices=ObjectList.cpu_list.get_names(),
-                      help = "cpu type for restoring from a checkpoint")
-
+    parser.add_argument("-r", "--checkpoint-restore", action="store", type=int,
+                        help="restore from checkpoint <N>")
+    parser.add_argument("--checkpoint-at-end", action="store_true",
+                        help="take a checkpoint at end of run")
+    parser.add_argument(
+        "--work-begin-checkpoint-count", action="store", type=int,
+        help="checkpoint at specified work begin count")
+    parser.add_argument(
+        "--work-end-checkpoint-count", action="store", type=int,
+        help="checkpoint at specified work end count")
+    parser.add_argument(
+        "--work-cpus-checkpoint-count", action="store", type=int,
+        help="checkpoint and exit when active cpu count is reached")
+    parser.add_argument("--restore-with-cpu", action="store",
+                        default="AtomicSimpleCPU",
+                        choices=ObjectList.cpu_list.get_names(),
+                        help="cpu type for restoring from a checkpoint")
 
     # CPU Switching - default switch model goes from a checkpoint
     # to a timing simple CPU with caches to warm up, then to detailed CPU for
     # data measurement
-    parser.add_option("--repeat-switch", action="store", type="int",
-        default=None,
+    parser.add_argument(
+        "--repeat-switch", action="store", type=int, default=None,
         help="switch back and forth between CPUs with period <N>")
-    parser.add_option("-s", "--standard-switch", action="store", type="int",
-        default=None,
+    parser.add_argument(
+        "-s", "--standard-switch", action="store", type=int, default=None,
         help="switch from timing to Detailed CPU after warmup period of <N>")
-    parser.add_option("-p", "--prog-interval", type="str",
-        help="CPU Progress Interval")
+    parser.add_argument("-p", "--prog-interval", type=str,
+                        help="CPU Progress Interval")
 
     # Fastforwarding and simpoint related materials
-    parser.add_option("-W", "--warmup-insts", action="store", type="int",
-        default=None,
+    parser.add_argument(
+        "-W", "--warmup-insts", action="store", type=int, default=None,
         help="Warmup period in total instructions (requires --standard-switch)")
-    parser.add_option("--bench", action="store", type="string", default=None,
+    parser.add_argument(
+        "--bench", action="store", type=str, default=None,
         help="base names for --take-checkpoint and --checkpoint-restore")
-    parser.add_option("-F", "--fast-forward", action="store", type="string",
-        default=None,
+    parser.add_argument(
+        "-F", "--fast-forward", action="store", type=str, default=None,
         help="Number of instructions to fast forward before switching")
-    parser.add_option("-S", "--simpoint", action="store_true", default=False,
+    parser.add_argument(
+        "-S", "--simpoint", action="store_true", default=False,
         help="""Use workload simpoints as an instruction offset for
                 --checkpoint-restore or --take-checkpoint.""")
-    parser.add_option("--at-instruction", action="store_true", default=False,
+    parser.add_argument(
+        "--at-instruction", action="store_true", default=False,
         help="""Treat value of --checkpoint-restore or --take-checkpoint as a
                 number of instructions.""")
-    parser.add_option("--spec-input", default="ref", type="choice",
-                      choices=["ref", "test", "train", "smred", "mdred",
-                               "lgred"],
-                      help="Input set size for SPEC CPU2000 benchmarks.")
-    parser.add_option("--arm-iset", default="arm", type="choice",
-                      choices=["arm", "thumb", "aarch64"],
-                      help="ARM instruction set.")
-    parser.add_option("--stats-root", action="append", default=[], help=
-        "If given, dump only stats of objects under the given SimObject. "
+    parser.add_argument("--spec-input", default="ref",
+                        choices=["ref", "test", "train", "smred", "mdred",
+                                 "lgred"],
+                        help="Input set size for SPEC CPU2000 benchmarks.")
+    parser.add_argument("--arm-iset", default="arm",
+                        choices=["arm", "thumb", "aarch64"],
+                        help="ARM instruction set.")
+    parser.add_argument(
+        "--stats-root", action="append", default=[],
+        help="If given, dump only stats of objects under the given SimObject. "
         "SimObjects are identified with Python notation as in: "
         "system.cpu[0].mmu. All elements of an array can be selected at "
         "once with: system.cpu[:].mmu. If given multiple times, dump stats "
         "that are present under any of the roots. If not given, dump all "
-        "stats. "
-    )
+        "stats. ")
 
 
 def addSEOptions(parser):
     # Benchmark options
-    parser.add_option("-c", "--cmd", default="",
-                      help="The binary to run in syscall emulation mode.")
-    parser.add_option("-o", "--options", default="",
-                      help="""The options to pass to the binary, use " "
+    parser.add_argument("-c", "--cmd", default="",
+                        help="The binary to run in syscall emulation mode.")
+    parser.add_argument("-o", "--options", default="",
+                        help="""The options to pass to the binary, use " "
                               around the entire string""")
-    parser.add_option("-e", "--env", default="",
-                      help="Initialize workload environment from text file.")
-    parser.add_option("-i", "--input", default="",
-                      help="Read stdin from a file.")
-    parser.add_option("--output", default="",
-                      help="Redirect stdout to a file.")
-    parser.add_option("--errout", default="",
-                      help="Redirect stderr to a file.")
-    parser.add_option("--chroot", action="store", type="string", default=None,
-                      help="The chroot option allows a user to alter the "    \
-                           "search path for processes running in SE mode. "   \
-                           "Normally, the search path would begin at the "    \
-                           "root of the filesystem (i.e. /). With chroot, "   \
-                           "a user can force the process to begin looking at" \
-                           "some other location (i.e. /home/user/rand_dir)."  \
-                           "The intended use is to trick sophisticated "      \
-                           "software which queries the __HOST__ filesystem "  \
-                           "for information or functionality. Instead of "    \
-                           "finding files on the __HOST__ filesystem, the "   \
-                           "process will find the user's replacment files.")
-    parser.add_option("--interp-dir", action="store", type="string",
-                      default=None,
-                      help="The interp-dir option is used for "
-                           "setting the interpreter's path. This will "
-                           "allow to load the guest dynamic linker/loader "
-                           "itself from the elf binary. The option points to "
-                           "the parent folder of the guest /lib in the "
-                           "host fs")
-
-    parser.add_option("--redirects", action="append", type="string",
-                      default=[],
-                      help="A collection of one or more redirect paths "
-                           "to be used in syscall emulation."
-                           "Usage: gem5.opt [...] --redirects /dir1=/path/"
-                           "to/host/dir1 --redirects /dir2=/path/to/host/dir2")
-    parser.add_option("--wait-gdb", default=False,
-                      help="Wait for remote GDB to connect.")
-
+    parser.add_argument("-e", "--env", default="",
+                        help="Initialize workload environment from text file.")
+    parser.add_argument("-i", "--input", default="",
+                        help="Read stdin from a file.")
+    parser.add_argument("--output", default="",
+                        help="Redirect stdout to a file.")
+    parser.add_argument("--errout", default="",
+                        help="Redirect stderr to a file.")
+    parser.add_argument("--chroot", action="store", type=str, default=None,
+                        help="The chroot option allows a user to alter the "
+                        "search path for processes running in SE mode. "
+                        "Normally, the search path would begin at the "
+                        "root of the filesystem (i.e. /). With chroot, "
+                        "a user can force the process to begin looking at"
+                        "some other location (i.e. /home/user/rand_dir)."
+                        "The intended use is to trick sophisticated "
+                        "software which queries the __HOST__ filesystem "
+                        "for information or functionality. Instead of "
+                        "finding files on the __HOST__ filesystem, the "
+                        "process will find the user's replacment files.")
+    parser.add_argument("--interp-dir", action="store", type=str,
+                        default=None,
+                        help="The interp-dir option is used for "
+                        "setting the interpreter's path. This will "
+                        "allow to load the guest dynamic linker/loader "
+                        "itself from the elf binary. The option points to "
+                        "the parent folder of the guest /lib in the "
+                        "host fs")
+
+    parser.add_argument("--redirects", action="append", type=str,
+                        default=[],
+                        help="A collection of one or more redirect paths "
+                        "to be used in syscall emulation."
+                        "Usage: gem5.opt [...] --redirects /dir1=/path/"
+                        "to/host/dir1 --redirects /dir2=/path/to/host/dir2")
+    parser.add_argument("--wait-gdb", default=False,
+                        help="Wait for remote GDB to connect.")
 
 
 def addFSOptions(parser):
     from common.FSConfig import os_types
 
     # Simulation options
-    parser.add_option("--timesync", action="store_true",
-            help="Prevent simulated time from getting ahead of real time")
+    parser.add_argument(
+        "--timesync", action="store_true",
+        help="Prevent simulated time from getting ahead of real time")
 
     # System options
-    parser.add_option("--kernel", action="store", type="string")
-    parser.add_option("--os-type", action="store", type="choice",
-                      choices=os_types[str(buildEnv['TARGET_ISA'])],
-                      default="linux",
-                      help="Specifies type of OS to boot")
-    parser.add_option("--script", action="store", type="string")
-    parser.add_option("--frame-capture", action="store_true",
-            help="Stores changed frame buffers from the VNC server to compressed "\
-            "files in the gem5 output directory")
+    parser.add_argument("--kernel", action="store", type=str)
+    parser.add_argument("--os-type", action="store",
+                        choices=os_types[str(buildEnv['TARGET_ISA'])],
+                        default="linux",
+                        help="Specifies type of OS to boot")
+    parser.add_argument("--script", action="store", type=str)
+    parser.add_argument(
+        "--frame-capture", action="store_true",
+        help="Stores changed frame buffers from the VNC server to compressed "
+        "files in the gem5 output directory")
 
     if buildEnv['TARGET_ISA'] == "arm":
-        parser.add_option("--bare-metal", action="store_true",
-                   help="Provide the raw system without the linux specific bits")
-        parser.add_option("--list-machine-types",
-                          action="callback", callback=_listPlatformTypes,
-                      help="List available platform types")
-        parser.add_option("--machine-type", action="store", type="choice",
-                choices=ObjectList.platform_list.get_names(),
-                default="VExpress_GEM5_V1")
-        parser.add_option("--dtb-filename", action="store", type="string",
-              help="Specifies device tree blob file to use with device-tree-"\
-              "enabled kernels")
-        parser.add_option("--enable-security-extensions", action="store_true",
-              help="Turn on the ARM Security Extensions")
-        parser.add_option("--enable-context-switch-stats-dump", \
-                action="store_true", help="Enable stats dump at context "\
-                "switches and dump tasks file (required for Streamline)")
-        parser.add_option("--vio-9p", action="store_true", help=vio_9p_help)
-        parser.add_option("--bootloader", action='append',
-                help="executable file that runs before the --kernel")
+        parser.add_argument(
+            "--bare-metal", action="store_true",
+            help="Provide the raw system without the linux specific bits")
+        parser.add_argument("--list-machine-types",
+                            action=ListPlatform, nargs=0,
+                            help="List available platform types")
+        parser.add_argument("--machine-type", action="store",
+                            choices=ObjectList.platform_list.get_names(),
+                            default="VExpress_GEM5_V1")
+        parser.add_argument(
+            "--dtb-filename", action="store", type=str,
+            help="Specifies device tree blob file to use with device-tree-"
+            "enabled kernels")
+        parser.add_argument(
+            "--enable-security-extensions", action="store_true",
+            help="Turn on the ARM Security Extensions")
+        parser.add_argument(
+            "--enable-context-switch-stats-dump", action="store_true",
+            help="Enable stats dump at context "
+            "switches and dump tasks file (required for Streamline)")
+        parser.add_argument("--vio-9p", action="store_true", help=vio_9p_help)
+        parser.add_argument(
+            "--bootloader", action='append',
+            help="executable file that runs before the --kernel")
 
     # Benchmark options
-    parser.add_option("--dual", action="store_true",
-                      help="Simulate two systems attached with an ethernet link")
-    parser.add_option("-b", "--benchmark", action="store", type="string",
-                      dest="benchmark",
-                      help="Specify the benchmark to run. Available benchmarks: %s"\
-                      % DefinedBenchmarks)
+    parser.add_argument(
+        "--dual", action="store_true",
+        help="Simulate two systems attached with an ethernet link")
+    parser.add_argument(
+        "-b", "--benchmark", action="store", type=str, dest="benchmark",
+        help="Specify the benchmark to run. Available benchmarks: %s" %
+        DefinedBenchmarks)
 
     # Metafile options
-    parser.add_option("--etherdump", action="store", type="string", dest="etherdump",
-                      help="Specify the filename to dump a pcap capture of the" \
-                      "ethernet traffic")
+    parser.add_argument(
+        "--etherdump", action="store", type=str, dest="etherdump",
+        help="Specify the filename to dump a pcap capture of the"
+        "ethernet traffic")
 
     # Disk Image Options
-    parser.add_option("--disk-image", action="append", type="string",
-            default=[], help="Path to the disk images to use.")
-    parser.add_option("--root-device", action="store", type="string",
-            default=None, help="OS device name for root partition")
+    parser.add_argument("--disk-image", action="append", type=str,
+                        default=[], help="Path to the disk images to use.")
+    parser.add_argument("--root-device", action="store", type=str,
+                        default=None, help="OS device name for root partition")
 
     # Command line options
-    parser.add_option("--command-line", action="store", type="string",
-                      default=None,
-                      help="Template for the kernel command line.")
-    parser.add_option("--command-line-file", action="store",
-                      default=None, type="string",
-                      help="File with a template for the kernel command line")
+    parser.add_argument("--command-line", action="store", type=str,
+                        default=None,
+                        help="Template for the kernel command line.")
+    parser.add_argument(
+        "--command-line-file", action="store", default=None, type=str,
+        help="File with a template for the kernel command line")
diff --git a/configs/common/SimpleOpts.py b/configs/common/SimpleOpts.py
index ce14f0e20f..fabc8e048b 100644
--- a/configs/common/SimpleOpts.py
+++ b/configs/common/SimpleOpts.py
@@ -27,7 +27,7 @@
 
 """ Options wrapper for simple gem5 configuration scripts
 
-This module wraps the optparse class so that we can register options
+This module wraps the argparse class so that we can register options
 from each class instead of only from the configuration script.
 
 """
@@ -38,24 +38,20 @@
 # For fatal
 import m5
 
-# import the options parser
-from optparse import OptionParser
+# import the argument parser
+from argparse import ArgumentParser
 
-# add the options we want to be able to control from the command line
-parser = OptionParser()
+# add the args we want to be able to control from the command line
+parser = ArgumentParser()
 
 def add_option(*args, **kwargs):
     """Call "add_option" to the global options parser
     """
 
-    if (parser.has_option(args[0]) or
-            (len(args) > 1 and parser.has_option(args[1])) ):
-        m5.fatal("Duplicate option: %s" % str(args))
-
     if called_parse_args:
         m5.fatal("Can't add an option after calling SimpleOpts.parse_args")
 
-    parser.add_option(*args, **kwargs)
+    parser.add_argument(*args, **kwargs)
 
 def parse_args():
     global called_parse_args
@@ -63,9 +59,6 @@ def parse_args():
 
     return parser.parse_args()
 
-def set_usage(*args, **kwargs):
-    parser.set_usage(*args, **kwargs)
-
 def print_help(*args, **kwargs):
     parser.print_help(*args, **kwargs)
 
diff --git a/configs/dist/sw.py b/configs/dist/sw.py
index 94e26b5501..41edf9e21b 100644
--- a/configs/dist/sw.py
+++ b/configs/dist/sw.py
@@ -27,7 +27,7 @@
 # This is an example of an n port network switch to work in dist-gem5.
 # Users can extend this to have different different topologies
 
-import optparse
+import argparse
 import sys
 
 import m5
@@ -40,22 +40,22 @@
 from common import Simulation
 from common import Options
 
-def build_switch(options):
+def build_switch(args):
     # instantiate an EtherSwitch
     switch = EtherSwitch()
     # instantiate distEtherLinks to connect switch ports
     # to other gem5 instances
-    switch.portlink = [DistEtherLink(speed = options.ethernet_linkspeed,
-                                      delay = options.ethernet_linkdelay,
-                                      dist_rank = options.dist_rank,
-                                      dist_size = options.dist_size,
-                                      server_name = options.dist_server_name,
-                                      server_port = options.dist_server_port,
-                                      sync_start = options.dist_sync_start,
-                                      sync_repeat = options.dist_sync_repeat,
+    switch.portlink = [DistEtherLink(speed = args.ethernet_linkspeed,
+                                      delay = args.ethernet_linkdelay,
+                                      dist_rank = args.dist_rank,
+                                      dist_size = args.dist_size,
+                                      server_name = args.dist_server_name,
+                                      server_port = args.dist_server_port,
+                                      sync_start = args.dist_sync_start,
+                                      sync_repeat = args.dist_sync_repeat,
                                       is_switch = True,
-                                      num_nodes = options.dist_size)
-                       for i in range(options.dist_size)]
+                                      num_nodes = args.dist_size)
+                       for i in range(args.dist_size)]
 
     for (i, link) in enumerate(switch.portlink):
         link.int0 = switch.interface[i]
@@ -64,14 +64,14 @@ def build_switch(options):
 
 def main():
     # Add options
-    parser = optparse.OptionParser()
+    parser = argparse.ArgumentParser()
     Options.addCommonOptions(parser)
     Options.addFSOptions(parser)
-    (options, args) = parser.parse_args()
+    args = parser.parse_args()
 
-    system = build_switch(options)
+    system = build_switch(args)
     root = Root(full_system = True, system = system)
-    Simulation.run(options, root, None, None)
+    Simulation.run(args, root, None, None)
 
 if __name__ == "__m5_main__":
     main()
diff --git a/configs/dram/lat_mem_rd.py b/configs/dram/lat_mem_rd.py
index 191d4b51c7..6ea2c59764 100644
--- a/configs/dram/lat_mem_rd.py
+++ b/configs/dram/lat_mem_rd.py
@@ -34,7 +34,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import gzip
-import optparse
+import argparse
 import os
 
 import m5
@@ -77,22 +77,18 @@
         print("Failed to import packet proto definitions")
         exit(-1)
 
-parser = optparse.OptionParser()
+parser = argparse.ArgumentParser()
 
-parser.add_option("--mem-type", type="choice", default="DDR3_1600_8x8",
-                  choices=ObjectList.mem_list.get_names(),
-                  help = "type of memory to use")
-parser.add_option("--mem-size", action="store", type="string",
-                  default="16MB",
-                  help="Specify the memory size")
-parser.add_option("--reuse-trace", action="store_true",
-                  help="Prevent generation of traces and reuse existing")
+parser.add_argument("--mem-type", default="DDR3_1600_8x8",
+                    choices=ObjectList.mem_list.get_names(),
+                    help = "type of memory to use")
+parser.add_argument("--mem-size", action="store", type=str,
+                    default="16MB",
+                    help="Specify the memory size")
+parser.add_argument("--reuse-trace", action="store_true",
+                    help="Prevent generation of traces and reuse existing")
 
-(options, args) = parser.parse_args()
-
-if args:
-    print("Error: script doesn't take any positional arguments")
-    sys.exit(1)
+args = parser.parse_args()
 
 # start by creating the system itself, using a multi-layer 2.0 GHz
 # crossbar, delivering 64 bytes / 3 cycles (one header cycle) which
@@ -102,20 +98,20 @@
                                    voltage_domain =
                                    VoltageDomain(voltage = '1V'))
 
-mem_range = AddrRange(options.mem_size)
+mem_range = AddrRange(args.mem_size)
 system.mem_ranges = [mem_range]
 
 # do not worry about reserving space for the backing store
 system.mmap_using_noreserve = True
 
-# currently not exposed as command-line options, set here for now
-options.mem_channels = 1
-options.mem_ranks = 1
-options.external_memory_system = 0
-options.tlm_memory = 0
-options.elastic_trace_en = 0
+# currently not exposed as command-line args, set here for now
+args.mem_channels = 1
+args.mem_ranks = 1
+args.external_memory_system = 0
+args.tlm_memory = 0
+args.elastic_trace_en = 0
 
-MemConfig.config_mem(options, system)
+MemConfig.config_mem(args, system)
 
 # there is no point slowing things down by saving any data
 for ctrl in system.mem_ctrls:
@@ -218,7 +214,7 @@ def create_trace(filename, max_addr, burst_size, itt):
     filename = os.path.join(m5.options.outdir,
                             'lat_mem_rd%d.trc.gz' % nxt_range)
 
-    if not options.reuse_trace:
+    if not args.reuse_trace:
         # create the actual random trace for this range
         create_trace(filename, r, burst_size, itt)
 
diff --git a/configs/dram/sweep.py b/configs/dram/sweep.py
index 8088091120..76548ea63d 100644
--- a/configs/dram/sweep.py
+++ b/configs/dram/sweep.py
@@ -34,7 +34,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import math
-import optparse
+import argparse
 
 import m5
 from m5.objects import *
@@ -51,7 +51,7 @@
 # and the sequential stride size (how many bytes per activate), and
 # observe what bus utilisation (bandwidth) is achieved
 
-parser = optparse.OptionParser()
+parser = argparse.ArgumentParser()
 
 dram_generators = {
     "DRAM" : lambda x: x.createDram,
@@ -59,30 +59,26 @@
 }
 
 # Use a single-channel DDR3-1600 x64 (8x8 topology) by default
-parser.add_option("--mem-type", type="choice", default="DDR3_1600_8x8",
-                  choices=ObjectList.mem_list.get_names(),
-                  help = "type of memory to use")
+parser.add_argument("--mem-type", default="DDR3_1600_8x8",
+                    choices=ObjectList.mem_list.get_names(),
+                    help = "type of memory to use")
 
-parser.add_option("--mem-ranks", "-r", type="int", default=1,
-                  help = "Number of ranks to iterate across")
+parser.add_argument("--mem-ranks", "-r", type=int, default=1,
+                    help = "Number of ranks to iterate across")
 
-parser.add_option("--rd_perc", type="int", default=100,
-                  help = "Percentage of read commands")
+parser.add_argument("--rd_perc", type=int, default=100,
+                    help = "Percentage of read commands")
 
-parser.add_option("--mode", type="choice", default="DRAM",
-                  choices=list(dram_generators.keys()),
-                  help = "DRAM: Random traffic; \
+parser.add_argument("--mode", default="DRAM",
+                    choices=list(dram_generators.keys()),
+                    help = "DRAM: Random traffic; \
                           DRAM_ROTATE: Traffic rotating across banks and ranks")
 
-parser.add_option("--addr-map", type="choice",
-                  choices=ObjectList.dram_addr_map_list.get_names(),
-                  default="RoRaBaCoCh", help = "DRAM address map policy")
+parser.add_argument("--addr-map",
+                    choices=ObjectList.dram_addr_map_list.get_names(),
+                    default="RoRaBaCoCh", help = "DRAM address map policy")
 
-(options, args) = parser.parse_args()
-
-if args:
-    print("Error: script doesn't take any positional arguments")
-    sys.exit(1)
+args = parser.parse_args()
 
 # at the moment we stay with the default open-adaptive page policy,
 # and address mapping
@@ -104,11 +100,11 @@
 
 # force a single channel to match the assumptions in the DRAM traffic
 # generator
-options.mem_channels = 1
-options.external_memory_system = 0
-options.tlm_memory = 0
-options.elastic_trace_en = 0
-MemConfig.config_mem(options, system)
+args.mem_channels = 1
+args.external_memory_system = 0
+args.tlm_memory = 0
+args.elastic_trace_en = 0
+MemConfig.config_mem(args, system)
 
 # the following assumes that we are using the native DRAM
 # controller, check to be sure
@@ -121,7 +117,7 @@
 system.mem_ctrls[0].dram.null = True
 
 # Set the address mapping based on input argument
-system.mem_ctrls[0].dram.addr_mapping = options.addr_map
+system.mem_ctrls[0].dram.addr_mapping = args.addr_map
 
 # stay in each state for 0.25 ms, long enough to warm things up, and
 # short enough to avoid hitting a refresh
@@ -178,16 +174,16 @@
 m5.instantiate()
 
 def trace():
-    addr_map = ObjectList.dram_addr_map_list.get(options.addr_map)
-    generator = dram_generators[options.mode](system.tgen)
+    addr_map = ObjectList.dram_addr_map_list.get(args.addr_map)
+    generator = dram_generators[args.mode](system.tgen)
     for stride_size in range(burst_size, max_stride + 1, burst_size):
         for bank in range(1, nbr_banks + 1):
             num_seq_pkts = int(math.ceil(float(stride_size) / burst_size))
             yield generator(period,
                             0, max_addr, burst_size, int(itt), int(itt),
-                            options.rd_perc, 0,
+                            args.rd_perc, 0,
                             num_seq_pkts, page_size, nbr_banks, bank,
-                            addr_map, options.mem_ranks)
+                            addr_map, args.mem_ranks)
     yield system.tgen.createExit(0)
 
 system.tgen.start(trace())
diff --git a/configs/example/apu_se.py b/configs/example/apu_se.py
index baf936068b..dd060a3a64 100644
--- a/configs/example/apu_se.py
+++ b/configs/example/apu_se.py
@@ -29,7 +29,7 @@
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 
-import optparse, os, re, getpass
+import argparse, os, re, getpass
 import math
 import glob
 import inspect
@@ -69,118 +69,122 @@ def getOption(parser, opt_str):
     exec("return_value = parser.values.%s" % opt.dest)
     return return_value
 
+
 # Adding script options
-parser = optparse.OptionParser()
+parser = argparse.ArgumentParser()
 Options.addCommonOptions(parser)
 Options.addSEOptions(parser)
 
-parser.add_option("--cpu-only-mode", action="store_true", default=False,
-                  help="APU mode. Used to take care of problems in "\
-                       "Ruby.py while running APU protocols")
-parser.add_option("-u", "--num-compute-units", type="int", default=4,
-                  help="number of GPU compute units"),
-parser.add_option("--num-cp", type="int", default=0,
-                  help="Number of GPU Command Processors (CP)")
-parser.add_option("--benchmark-root", help="Root of benchmark directory tree")
+parser.add_argument("--cpu-only-mode", action="store_true", default=False,
+                    help="APU mode. Used to take care of problems in "
+                    "Ruby.py while running APU protocols")
+parser.add_argument("-u", "--num-compute-units", type=int, default=4,
+                    help="number of GPU compute units"),
+parser.add_argument("--num-cp", type=int, default=0,
+                    help="Number of GPU Command Processors (CP)")
+parser.add_argument("--benchmark-root",
+                    help="Root of benchmark directory tree")
 
 # not super important now, but to avoid putting the number 4 everywhere, make
 # it an option/knob
-parser.add_option("--cu-per-sqc", type="int", default=4, help="number of CUs" \
-                  "sharing an SQC (icache, and thus icache TLB)")
-parser.add_option('--cu-per-scalar-cache', type='int', default=4,
-                  help='Number of CUs sharing a scalar cache')
-parser.add_option("--simds-per-cu", type="int", default=4, help="SIMD units" \
-                  "per CU")
-parser.add_option('--cu-per-sa', type='int', default=4,
-                  help='Number of CUs per shader array. This must be a '
-                  'multiple of options.cu-per-sqc and options.cu-per-scalar')
-parser.add_option('--sa-per-complex', type='int', default=1,
-                  help='Number of shader arrays per complex')
-parser.add_option('--num-gpu-complexes', type='int', default=1,
-                  help='Number of GPU complexes')
-parser.add_option("--wf-size", type="int", default=64,
-                  help="Wavefront size(in workitems)")
-parser.add_option("--sp-bypass-path-length", type="int", default=4, \
-                  help="Number of stages of bypass path in vector ALU for "
-                  "Single Precision ops")
-parser.add_option("--dp-bypass-path-length", type="int", default=4, \
-                  help="Number of stages of bypass path in vector ALU for "
-                  "Double Precision ops")
+parser.add_argument("--cu-per-sqc", type=int, default=4, help="number of CUs"
+                    "sharing an SQC (icache, and thus icache TLB)")
+parser.add_argument('--cu-per-scalar-cache', type=int, default=4,
+                    help='Number of CUs sharing a scalar cache')
+parser.add_argument("--simds-per-cu", type=int, default=4, help="SIMD units"
+                    "per CU")
+parser.add_argument('--cu-per-sa', type=int, default=4,
+                    help='Number of CUs per shader array. This must be a '
+                    'multiple of options.cu-per-sqc and options.cu-per-scalar')
+parser.add_argument('--sa-per-complex', type=int, default=1,
+                    help='Number of shader arrays per complex')
+parser.add_argument('--num-gpu-complexes', type=int, default=1,
+                    help='Number of GPU complexes')
+parser.add_argument("--wf-size", type=int, default=64,
+                    help="Wavefront size(in workitems)")
+parser.add_argument("--sp-bypass-path-length", type=int, default=4,
+                    help="Number of stages of bypass path in vector ALU for "
+                    "Single Precision ops")
+parser.add_argument("--dp-bypass-path-length", type=int, default=4,
+                    help="Number of stages of bypass path in vector ALU for "
+                    "Double Precision ops")
 # issue period per SIMD unit: number of cycles before issuing another vector
-parser.add_option("--issue-period", type="int", default=4, \
-                  help="Number of cycles per vector instruction issue period")
-parser.add_option("--glbmem-wr-bus-width", type="int", default=32, \
-                  help="VGPR to Coalescer (Global Memory) data bus width "
-                  "in bytes")
-parser.add_option("--glbmem-rd-bus-width", type="int", default=32, \
-                  help="Coalescer to VGPR (Global Memory) data bus width in "
-                  "bytes")
+parser.add_argument(
+    "--issue-period", type=int, default=4,
+    help="Number of cycles per vector instruction issue period")
+parser.add_argument("--glbmem-wr-bus-width", type=int, default=32,
+                    help="VGPR to Coalescer (Global Memory) data bus width "
+                    "in bytes")
+parser.add_argument("--glbmem-rd-bus-width", type=int, default=32,
+                    help="Coalescer to VGPR (Global Memory) data bus width in "
+                    "bytes")
 # Currently we only support 1 local memory pipe
-parser.add_option("--shr-mem-pipes-per-cu", type="int", default=1, \
-                  help="Number of Shared Memory pipelines per CU")
+parser.add_argument("--shr-mem-pipes-per-cu", type=int, default=1,
+                    help="Number of Shared Memory pipelines per CU")
 # Currently we only support 1 global memory pipe
-parser.add_option("--glb-mem-pipes-per-cu", type="int", default=1, \
-                  help="Number of Global Memory pipelines per CU")
-parser.add_option("--wfs-per-simd", type="int", default=10, help="Number of " \
-                  "WF slots per SIMD")
-
-parser.add_option("--registerManagerPolicy", type="string", default="static",
-                  help="Register manager policy")
-parser.add_option("--vreg-file-size", type="int", default=2048,
-                  help="number of physical vector registers per SIMD")
-parser.add_option("--vreg-min-alloc", type="int", default=4,
-                  help="Minimum number of registers that can be allocated "
-                  "from the VRF. The total number of registers will be "
-                  "aligned to this value.")
-
-parser.add_option("--sreg-file-size", type="int", default=2048,
-                  help="number of physical vector registers per SIMD")
-parser.add_option("--sreg-min-alloc", type="int", default=4,
-                  help="Minimum number of registers that can be allocated "
-                  "from the SRF. The total number of registers will be "
-                  "aligned to this value.")
-
-parser.add_option("--bw-scalor", type="int", default=0,
-                  help="bandwidth scalor for scalability analysis")
-parser.add_option("--CPUClock", type="string", default="2GHz",
-                  help="CPU clock")
-parser.add_option("--gpu-clock", type="string", default="1GHz",
-                  help="GPU clock")
-parser.add_option("--cpu-voltage", action="store", type="string",
-                  default='1.0V',
-                  help = """CPU  voltage domain""")
-parser.add_option("--gpu-voltage", action="store", type="string",
-                  default='1.0V',
-                  help = """CPU  voltage domain""")
-parser.add_option("--CUExecPolicy", type="string", default="OLDEST-FIRST",
-                  help="WF exec policy (OLDEST-FIRST, ROUND-ROBIN)")
-parser.add_option("--SegFaultDebug",action="store_true",
-                 help="checks for GPU seg fault before TLB access")
-parser.add_option("--FunctionalTLB",action="store_true",
-                 help="Assumes TLB has no latency")
-parser.add_option("--LocalMemBarrier",action="store_true",
-                 help="Barrier does not wait for writethroughs to complete")
-parser.add_option("--countPages", action="store_true",
-                 help="Count Page Accesses and output in per-CU output files")
-parser.add_option("--TLB-prefetch", type="int", help = "prefetch depth for"\
-                  "TLBs")
-parser.add_option("--pf-type", type="string", help="type of prefetch: "\
-                  "PF_CU, PF_WF, PF_PHASE, PF_STRIDE")
-parser.add_option("--pf-stride", type="int", help="set prefetch stride")
-parser.add_option("--numLdsBanks", type="int", default=32,
-                  help="number of physical banks per LDS module")
-parser.add_option("--ldsBankConflictPenalty", type="int", default=1,
-                  help="number of cycles per LDS bank conflict")
-parser.add_option("--lds-size", type="int", default=65536,
-                   help="Size of the LDS in bytes")
-parser.add_option('--fast-forward-pseudo-op', action='store_true',
-                  help = 'fast forward using kvm until the m5_switchcpu'
-                  ' pseudo-op is encountered, then switch cpus. subsequent'
-                  ' m5_switchcpu pseudo-ops will toggle back and forth')
-parser.add_option("--num-hw-queues", type="int", default=10,
-                  help="number of hw queues in packet processor")
-parser.add_option("--reg-alloc-policy",type="string", default="simple",
-                  help="register allocation policy (simple/dynamic)")
+parser.add_argument("--glb-mem-pipes-per-cu", type=int, default=1,
+                    help="Number of Global Memory pipelines per CU")
+parser.add_argument("--wfs-per-simd", type=int, default=10, help="Number of "
+                    "WF slots per SIMD")
+
+parser.add_argument("--registerManagerPolicy", type=str, default="static",
+                    help="Register manager policy")
+parser.add_argument("--vreg-file-size", type=int, default=2048,
+                    help="number of physical vector registers per SIMD")
+parser.add_argument("--vreg-min-alloc", type=int, default=4,
+                    help="Minimum number of registers that can be allocated "
+                    "from the VRF. The total number of registers will be "
+                    "aligned to this value.")
+
+parser.add_argument("--sreg-file-size", type=int, default=2048,
+                    help="number of physical vector registers per SIMD")
+parser.add_argument("--sreg-min-alloc", type=int, default=4,
+                    help="Minimum number of registers that can be allocated "
+                    "from the SRF. The total number of registers will be "
+                    "aligned to this value.")
+
+parser.add_argument("--bw-scalor", type=int, default=0,
+                    help="bandwidth scalor for scalability analysis")
+parser.add_argument("--CPUClock", type=str, default="2GHz",
+                    help="CPU clock")
+parser.add_argument("--gpu-clock", type=str, default="1GHz",
+                    help="GPU clock")
+parser.add_argument("--cpu-voltage", action="store", type=str,
+                    default='1.0V',
+                    help="""CPU  voltage domain""")
+parser.add_argument("--gpu-voltage", action="store", type=str,
+                    default='1.0V',
+                    help="""CPU  voltage domain""")
+parser.add_argument("--CUExecPolicy", type=str, default="OLDEST-FIRST",
+                    help="WF exec policy (OLDEST-FIRST, ROUND-ROBIN)")
+parser.add_argument("--SegFaultDebug", action="store_true",
+                    help="checks for GPU seg fault before TLB access")
+parser.add_argument("--FunctionalTLB", action="store_true",
+                    help="Assumes TLB has no latency")
+parser.add_argument("--LocalMemBarrier", action="store_true",
+                    help="Barrier does not wait for writethroughs to complete")
+parser.add_argument(
+    "--countPages", action="store_true",
+    help="Count Page Accesses and output in per-CU output files")
+parser.add_argument("--TLB-prefetch", type=int, help="prefetch depth for"
+                    "TLBs")
+parser.add_argument("--pf-type", type=str, help="type of prefetch: "
+                    "PF_CU, PF_WF, PF_PHASE, PF_STRIDE")
+parser.add_argument("--pf-stride", type=int, help="set prefetch stride")
+parser.add_argument("--numLdsBanks", type=int, default=32,
+                    help="number of physical banks per LDS module")
+parser.add_argument("--ldsBankConflictPenalty", type=int, default=1,
+                    help="number of cycles per LDS bank conflict")
+parser.add_argument("--lds-size", type=int, default=65536,
+                    help="Size of the LDS in bytes")
+parser.add_argument('--fast-forward-pseudo-op', action='store_true',
+                    help='fast forward using kvm until the m5_switchcpu'
+                    ' pseudo-op is encountered, then switch cpus. subsequent'
+                    ' m5_switchcpu pseudo-ops will toggle back and forth')
+parser.add_argument("--num-hw-queues", type=int, default=10,
+                    help="number of hw queues in packet processor")
+parser.add_argument("--reg-alloc-policy", type=str, default="simple",
+                    help="register allocation policy (simple/dynamic)")
 
 parser.add_option("--dgpu", action="store_true", default=False,
                   help="Configure the system as a dGPU instead of an APU. "
@@ -191,17 +195,17 @@ def getOption(parser, opt_str):
 
 Ruby.define_options(parser)
 
-#add TLB options to the parser
+# add TLB options to the parser
 GPUTLBOptions.tlb_options(parser)
 
-(options, args) = parser.parse_args()
+args = parser.parse_args()
 
 # The GPU cache coherence protocols only work with the backing store
 setOption(parser, "--access-backing-store")
 
 # if benchmark root is specified explicitly, that overrides the search path
-if options.benchmark_root:
-    benchmark_path = [options.benchmark_root]
+if args.benchmark_root:
+    benchmark_path = [args.benchmark_root]
 else:
     # Set default benchmark search path to current dir
     benchmark_path = ['.']
@@ -213,34 +217,34 @@ def getOption(parser, opt_str):
     fatal("GPU model requires ruby")
 
 # Currently the gpu model requires only timing or detailed CPU
-if not (options.cpu_type == "TimingSimpleCPU" or
-   options.cpu_type == "DerivO3CPU"):
+if not (args.cpu_type == "TimingSimpleCPU" or
+   args.cpu_type == "DerivO3CPU"):
     fatal("GPU model requires TimingSimpleCPU or DerivO3CPU")
 
 # This file can support multiple compute units
-assert(options.num_compute_units >= 1)
+assert(args.num_compute_units >= 1)
 
 # Currently, the sqc (I-Cache of GPU) is shared by
 # multiple compute units(CUs). The protocol works just fine
 # even if sqc is not shared. Overriding this option here
 # so that the user need not explicitly set this (assuming
 # sharing sqc is the common usage)
-n_cu = options.num_compute_units
-num_sqc = int(math.ceil(float(n_cu) / options.cu_per_sqc))
-options.num_sqc = num_sqc # pass this to Ruby
-num_scalar_cache = int(math.ceil(float(n_cu) / options.cu_per_scalar_cache))
-options.num_scalar_cache = num_scalar_cache
+n_cu = args.num_compute_units
+num_sqc = int(math.ceil(float(n_cu) / args.cu_per_sqc))
+args.num_sqc = num_sqc # pass this to Ruby
+num_scalar_cache = int(math.ceil(float(n_cu) / args.cu_per_scalar_cache))
+args.num_scalar_cache = num_scalar_cache
 
 print('Num SQC = ', num_sqc, 'Num scalar caches = ', num_scalar_cache,
       'Num CU = ', n_cu)
 
 ########################## Creating the GPU system ########################
 # shader is the GPU
-shader = Shader(n_wf = options.wfs_per_simd,
+shader = Shader(n_wf = args.wfs_per_simd,
                 clk_domain = SrcClockDomain(
-                    clock = options.gpu_clock,
+                    clock = args.gpu_clock,
                     voltage_domain = VoltageDomain(
-                        voltage = options.gpu_voltage)))
+                        voltage = args.gpu_voltage)))
 
 # VIPER GPU protocol implements release consistency at GPU side. So,
 # we make their writes visible to the global memory and should read
@@ -259,83 +263,83 @@ def getOption(parser, opt_str):
 
 # Switching off per-lane TLB by default
 per_lane = False
-if options.TLB_config == "perLane":
+if args.TLB_config == "perLane":
     per_lane = True
 
 # List of compute units; one GPU can have multiple compute units
 compute_units = []
 for i in range(n_cu):
     compute_units.append(ComputeUnit(cu_id = i, perLaneTLB = per_lane,
-                                     num_SIMDs = options.simds_per_cu,
-                                     wf_size = options.wf_size,
+                                     num_SIMDs = args.simds_per_cu,
+                                     wf_size = args.wf_size,
                                      spbypass_pipe_length = \
-                                     options.sp_bypass_path_length,
+                                     args.sp_bypass_path_length,
                                      dpbypass_pipe_length = \
-                                     options.dp_bypass_path_length,
-                                     issue_period = options.issue_period,
+                                     args.dp_bypass_path_length,
+                                     issue_period = args.issue_period,
                                      coalescer_to_vrf_bus_width = \
-                                     options.glbmem_rd_bus_width,
+                                     args.glbmem_rd_bus_width,
                                      vrf_to_coalescer_bus_width = \
-                                     options.glbmem_wr_bus_width,
+                                     args.glbmem_wr_bus_width,
                                      num_global_mem_pipes = \
-                                     options.glb_mem_pipes_per_cu,
+                                     args.glb_mem_pipes_per_cu,
                                      num_shared_mem_pipes = \
-                                     options.shr_mem_pipes_per_cu,
-                                     n_wf = options.wfs_per_simd,
-                                     execPolicy = options.CUExecPolicy,
-                                     debugSegFault = options.SegFaultDebug,
-                                     functionalTLB = options.FunctionalTLB,
-                                     localMemBarrier = options.LocalMemBarrier,
-                                     countPages = options.countPages,
+                                     args.shr_mem_pipes_per_cu,
+                                     n_wf = args.wfs_per_simd,
+                                     execPolicy = args.CUExecPolicy,
+                                     debugSegFault = args.SegFaultDebug,
+                                     functionalTLB = args.FunctionalTLB,
+                                     localMemBarrier = args.LocalMemBarrier,
+                                     countPages = args.countPages,
                                      localDataStore = \
-                                     LdsState(banks = options.numLdsBanks,
+                                     LdsState(banks = args.numLdsBanks,
                                               bankConflictPenalty = \
-                                              options.ldsBankConflictPenalty,
-                                              size = options.lds_size)))
+                                              args.ldsBankConflictPenalty,
+                                              size = args.lds_size)))
     wavefronts = []
     vrfs = []
     vrf_pool_mgrs = []
     srfs = []
     srf_pool_mgrs = []
-    for j in range(options.simds_per_cu):
+    for j in range(args.simds_per_cu):
         for k in range(shader.n_wf):
             wavefronts.append(Wavefront(simdId = j, wf_slot_id = k,
-                                        wf_size = options.wf_size))
+                                        wf_size = args.wf_size))
 
-        if options.reg_alloc_policy == "simple":
+        if args.reg_alloc_policy == "simple":
             vrf_pool_mgrs.append(SimplePoolManager(pool_size = \
-                                               options.vreg_file_size,
+                                               args.vreg_file_size,
                                                min_alloc = \
-                                               options.vreg_min_alloc))
+                                               args.vreg_min_alloc))
             srf_pool_mgrs.append(SimplePoolManager(pool_size = \
-                                               options.sreg_file_size,
+                                               args.sreg_file_size,
                                                min_alloc = \
-                                               options.vreg_min_alloc))
-        elif options.reg_alloc_policy == "dynamic":
+                                               args.vreg_min_alloc))
+        elif args.reg_alloc_policy == "dynamic":
             vrf_pool_mgrs.append(DynPoolManager(pool_size = \
-                                               options.vreg_file_size,
+                                               args.vreg_file_size,
                                                min_alloc = \
-                                               options.vreg_min_alloc))
+                                               args.vreg_min_alloc))
             srf_pool_mgrs.append(DynPoolManager(pool_size = \
-                                               options.sreg_file_size,
+                                               args.sreg_file_size,
                                                min_alloc = \
-                                               options.vreg_min_alloc))
+                                               args.vreg_min_alloc))
 
-        vrfs.append(VectorRegisterFile(simd_id=j, wf_size=options.wf_size,
-                                       num_regs=options.vreg_file_size))
-        srfs.append(ScalarRegisterFile(simd_id=j, wf_size=options.wf_size,
-                                       num_regs=options.sreg_file_size))
+        vrfs.append(VectorRegisterFile(simd_id=j, wf_size=args.wf_size,
+                                       num_regs=args.vreg_file_size))
+        srfs.append(ScalarRegisterFile(simd_id=j, wf_size=args.wf_size,
+                                       num_regs=args.sreg_file_size))
 
     compute_units[-1].wavefronts = wavefronts
     compute_units[-1].vector_register_file = vrfs
     compute_units[-1].scalar_register_file = srfs
     compute_units[-1].register_manager = \
-        RegisterManager(policy=options.registerManagerPolicy,
+        RegisterManager(policy=args.registerManagerPolicy,
                         vrf_pool_managers=vrf_pool_mgrs,
                         srf_pool_managers=srf_pool_mgrs)
-    if options.TLB_prefetch:
-        compute_units[-1].prefetch_depth = options.TLB_prefetch
-        compute_units[-1].prefetch_prev_type = options.pf_type
+    if args.TLB_prefetch:
+        compute_units[-1].prefetch_depth = args.TLB_prefetch
+        compute_units[-1].prefetch_prev_type = args.pf_type
 
     # attach the LDS and the CU to the bus (actually a Bridge)
     compute_units[-1].ldsPort = compute_units[-1].ldsBus.slave
@@ -346,7 +350,7 @@ def getOption(parser, opt_str):
 
 ########################## Creating the CPU system ########################
 # The shader core will be whatever is after the CPU cores are accounted for
-shader_idx = options.num_cpus
+shader_idx = args.num_cpus
 
 # The command processor will be whatever is after the shader is accounted for
 cp_idx = shader_idx + 1
@@ -355,17 +359,17 @@ def getOption(parser, opt_str):
 # List of CPUs
 cpu_list = []
 
-CpuClass, mem_mode = Simulation.getCPUClass(options.cpu_type)
+CpuClass, mem_mode = Simulation.getCPUClass(args.cpu_type)
 if CpuClass == AtomicSimpleCPU:
     fatal("AtomicSimpleCPU is not supported")
 if mem_mode != 'timing':
     fatal("Only the timing memory mode is supported")
 shader.timing = True
 
-if options.fast_forward and options.fast_forward_pseudo_op:
+if args.fast_forward and args.fast_forward_pseudo_op:
     fatal("Cannot fast-forward based both on the number of instructions and"
           " on pseudo-ops")
-fast_forward = options.fast_forward or options.fast_forward_pseudo_op
+fast_forward = args.fast_forward or args.fast_forward_pseudo_op
 
 if fast_forward:
     FutureCpuClass, future_mem_mode = CpuClass, mem_mode
@@ -379,16 +383,16 @@ def getOption(parser, opt_str):
     future_cpu_list = []
 
     # Initial CPUs to be used during fast-forwarding.
-    for i in range(options.num_cpus):
+    for i in range(args.num_cpus):
         cpu = CpuClass(cpu_id = i,
                        clk_domain = SrcClockDomain(
-                           clock = options.CPUClock,
+                           clock = args.CPUClock,
                            voltage_domain = VoltageDomain(
-                               voltage = options.cpu_voltage)))
+                               voltage = args.cpu_voltage)))
         cpu_list.append(cpu)
 
-        if options.fast_forward:
-            cpu.max_insts_any_thread = int(options.fast_forward)
+        if args.fast_forward:
+            cpu.max_insts_any_thread = int(args.fast_forward)
 
 if fast_forward:
     MainCpuClass = FutureCpuClass
@@ -396,21 +400,21 @@ def getOption(parser, opt_str):
     MainCpuClass = CpuClass
 
 # CPs to be used throughout the simulation.
-for i in range(options.num_cp):
-    cp = MainCpuClass(cpu_id = options.num_cpus + i,
+for i in range(args.num_cp):
+    cp = MainCpuClass(cpu_id = args.num_cpus + i,
                       clk_domain = SrcClockDomain(
-                          clock = options.CPUClock,
+                          clock = args.CPUClock,
                           voltage_domain = VoltageDomain(
-                              voltage = options.cpu_voltage)))
+                              voltage = args.cpu_voltage)))
     cp_list.append(cp)
 
 # Main CPUs (to be used after fast-forwarding if fast-forwarding is specified).
-for i in range(options.num_cpus):
+for i in range(args.num_cpus):
     cpu = MainCpuClass(cpu_id = i,
                        clk_domain = SrcClockDomain(
-                           clock = options.CPUClock,
+                           clock = args.CPUClock,
                            voltage_domain = VoltageDomain(
-                               voltage = options.cpu_voltage)))
+                               voltage = args.cpu_voltage)))
     if fast_forward:
         cpu.switched_out = True
         future_cpu_list.append(cpu)
@@ -421,16 +425,16 @@ def getOption(parser, opt_str):
 
 hsapp_gpu_map_vaddr = 0x200000000
 hsapp_gpu_map_size = 0x1000
-hsapp_gpu_map_paddr = int(Addr(options.mem_size))
+hsapp_gpu_map_paddr = int(Addr(args.mem_size))
 
 # HSA kernel mode driver
-gpu_driver = GPUComputeDriver(filename = "kfd", isdGPU = options.dgpu)
+gpu_driver = GPUComputeDriver(filename = "kfd", isdGPU = args.dgpu)
 
 # Creating the GPU kernel launching components: that is the HSA
 # packet processor (HSAPP), GPU command processor (CP), and the
 # dispatcher.
 gpu_hsapp = HSAPacketProcessor(pioAddr=hsapp_gpu_map_paddr,
-                               numHWQueues=options.num_hw_queues)
+                               numHWQueues=args.num_hw_queues)
 dispatcher = GPUDispatcher()
 gpu_cmd_proc = GPUCommandProcessor(hsapp=gpu_hsapp,
                                    dispatcher=dispatcher)
@@ -453,15 +457,15 @@ def find_path(base_list, rel_path, test):
 def find_file(base_list, rel_path):
     return find_path(base_list, rel_path, os.path.isfile)
 
-executable = find_path(benchmark_path, options.cmd, os.path.exists)
+executable = find_path(benchmark_path, args.cmd, os.path.exists)
 # It's common for a benchmark to be in a directory with the same
 # name as the executable, so we handle that automatically
 if os.path.isdir(executable):
     benchmark_path = [executable]
-    executable = find_file(benchmark_path, options.cmd)
+    executable = find_file(benchmark_path, args.cmd)
 
-if options.env:
-    with open(options.env, 'r') as f:
+if args.env:
+    with open(args.env, 'r') as f:
         env = [line.rstrip() for line in f]
 else:
     env = ['LD_LIBRARY_PATH=%s' % ':'.join([
@@ -487,8 +491,8 @@ def find_file(base_list, rel_path):
            # vector copy kernels for dGPU memcopies to/from host and device.
            "HSA_ENABLE_SDMA=0"]
 
-process = Process(executable = executable, cmd = [options.cmd]
-                  + options.options.split(), drivers = [gpu_driver], env = env)
+process = Process(executable = executable, cmd = [args.cmd]
+                  + args.options.split(), drivers = [gpu_driver], env = env)
 
 for cpu in cpu_list:
     cpu.createThreads()
@@ -506,7 +510,7 @@ def find_file(base_list, rel_path):
 # List of CPUs that must be switched when moving between KVM and simulation
 if fast_forward:
     switch_cpu_list = \
-        [(cpu_list[i], future_cpu_list[i]) for i in range(options.num_cpus)]
+        [(cpu_list[i], future_cpu_list[i]) for i in range(args.num_cpus)]
 
 # Full list of processing cores in the system.
 cpu_list = cpu_list + [shader] + cp_list
@@ -514,14 +518,14 @@ def find_file(base_list, rel_path):
 # creating the overall system
 # notice the cpu list is explicitly added as a parameter to System
 system = System(cpu = cpu_list,
-                mem_ranges = [AddrRange(options.mem_size)],
-                cache_line_size = options.cacheline_size,
+                mem_ranges = [AddrRange(args.mem_size)],
+                cache_line_size = args.cacheline_size,
                 mem_mode = mem_mode,
                 workload = SEWorkload.init_compatible(executable))
 if fast_forward:
     system.future_cpu = future_cpu_list
-system.voltage_domain = VoltageDomain(voltage = options.sys_voltage)
-system.clk_domain = SrcClockDomain(clock =  options.sys_clock,
+system.voltage_domain = VoltageDomain(voltage = args.sys_voltage)
+system.clk_domain = SrcClockDomain(clock =  args.sys_clock,
                                    voltage_domain = system.voltage_domain)
 
 if fast_forward:
@@ -535,14 +539,14 @@ def find_file(base_list, rel_path):
         fatal("KvmCPU can only be used in SE mode with x86")
 
 # configure the TLB hierarchy
-GPUTLBConfig.config_tlb_hierarchy(options, system, shader_idx)
+GPUTLBConfig.config_tlb_hierarchy(args, system, shader_idx)
 
 # create Ruby system
 system.piobus = IOXBar(width=32, response_latency=0,
                        frontend_latency=0, forward_latency=0)
 dma_list = [gpu_hsapp, gpu_cmd_proc]
-Ruby.create_system(options, None, system, None, dma_list, None)
-system.ruby.clk_domain = SrcClockDomain(clock = options.ruby_clock,
+Ruby.create_system(args, None, system, None, dma_list, None)
+system.ruby.clk_domain = SrcClockDomain(clock = args.ruby_clock,
                                     voltage_domain = system.voltage_domain)
 gpu_cmd_proc.pio = system.piobus.master
 gpu_hsapp.pio = system.piobus.master
@@ -551,7 +555,7 @@ def find_file(base_list, rel_path):
     exec('system.dma_cntrl%d.clk_domain = system.ruby.clk_domain' % i)
 
 # attach the CPU ports to Ruby
-for i in range(options.num_cpus):
+for i in range(args.num_cpus):
     ruby_port = system.ruby._cpu_ports[i]
 
     # Create interrupt controller
@@ -578,9 +582,9 @@ def find_file(base_list, rel_path):
 # per compute unit and one sequencer per SQC for the math to work out
 # correctly.
 gpu_port_idx = len(system.ruby._cpu_ports) \
-               - options.num_compute_units - options.num_sqc \
-               - options.num_scalar_cache
-gpu_port_idx = gpu_port_idx - options.num_cp * 2
+               - args.num_compute_units - args.num_sqc \
+               - args.num_scalar_cache
+gpu_port_idx = gpu_port_idx - args.num_cp * 2
 
 # Connect token ports. For this we need to search through the list of all
 # sequencers, since the TCP coalescers will not necessarily be first. Only
@@ -592,7 +596,7 @@ def find_file(base_list, rel_path):
             system.ruby._cpu_ports[i].gmTokenPort
         token_port_idx += 1
 
-wavefront_size = options.wf_size
+wavefront_size = args.wf_size
 for i in range(n_cu):
     # The pipeline issues wavefront_size number of uncoalesced requests
     # in one GPU issue cycle. Hence wavefront_size mem ports.
@@ -602,7 +606,7 @@ def find_file(base_list, rel_path):
     gpu_port_idx += 1
 
 for i in range(n_cu):
-    if i > 0 and not i % options.cu_per_sqc:
+    if i > 0 and not i % args.cu_per_sqc:
         print("incrementing idx on ", i)
         gpu_port_idx += 1
     system.cpu[shader_idx].CUs[i].sqc_port = \
@@ -610,7 +614,7 @@ def find_file(base_list, rel_path):
 gpu_port_idx = gpu_port_idx + 1
 
 for i in range(n_cu):
-    if i > 0 and not i % options.cu_per_scalar_cache:
+    if i > 0 and not i % args.cu_per_scalar_cache:
         print("incrementing idx on ", i)
         gpu_port_idx += 1
     system.cpu[shader_idx].CUs[i].scalar_port = \
@@ -618,7 +622,7 @@ def find_file(base_list, rel_path):
 gpu_port_idx = gpu_port_idx + 1
 
 # attach CP ports to Ruby
-for i in range(options.num_cp):
+for i in range(args.num_cp):
     system.cpu[cp_idx].createInterruptController()
     system.cpu[cp_idx].dcache_port = \
                 system.ruby._cpu_ports[gpu_port_idx + i * 2].slave
@@ -660,23 +664,23 @@ def find_file(base_list, rel_path):
 
 # Create the /sys/devices filesystem for the simulator so that the HSA Runtime
 # knows what type of GPU hardware we are simulating
-if options.dgpu:
-    hsaTopology.createFijiTopology(options)
+if args.dgpu:
+    hsaTopology.createFijiTopology(args)
 else:
-    hsaTopology.createCarrizoTopology(options)
+    hsaTopology.createCarrizoTopology(args)
 
 m5.ticks.setGlobalFrequency('1THz')
-if options.abs_max_tick:
-    maxtick = options.abs_max_tick
+if args.abs_max_tick:
+    maxtick = args.abs_max_tick
 else:
     maxtick = m5.MaxTick
 
 # Benchmarks support work item annotations
-Simulation.setWorkCountOptions(system, options)
+Simulation.setWorkCountOptions(system, args)
 
 # Checkpointing is not supported by APU model
-if (options.checkpoint_dir != None or
-    options.checkpoint_restore != None):
+if (args.checkpoint_dir != None or
+    args.checkpoint_restore != None):
     fatal("Checkpointing not supported by apu model")
 
 checkpoint_dir = None
@@ -685,18 +689,18 @@ def find_file(base_list, rel_path):
 # Map workload to this address space
 host_cpu.workload[0].map(0x10000000, 0x200000000, 4096)
 
-if options.fast_forward:
+if args.fast_forward:
     print("Switch at instruction count: %d" % cpu_list[0].max_insts_any_thread)
 
 exit_event = m5.simulate(maxtick)
 
-if options.fast_forward:
+if args.fast_forward:
     if exit_event.getCause() == "a thread reached the max instruction count":
         m5.switchCpus(system, switch_cpu_list)
         print("Switched CPUS @ tick %s" % (m5.curTick()))
         m5.stats.reset()
         exit_event = m5.simulate(maxtick - m5.curTick())
-elif options.fast_forward_pseudo_op:
+elif args.fast_forward_pseudo_op:
     while exit_event.getCause() == "switchcpu":
         # If we are switching *to* kvm, then the current stats are meaningful
         # Note that we don't do any warmup by default
diff --git a/configs/example/arm/ruby_fs.py b/configs/example/arm/ruby_fs.py
index 24f2097436..3783f3389e 100644
--- a/configs/example/arm/ruby_fs.py
+++ b/configs/example/arm/ruby_fs.py
@@ -38,7 +38,7 @@
 from m5.util import addToPath
 from m5.objects import *
 from m5.options import *
-import optparse
+import argparse
 
 m5.util.addToPath('../..')
 
@@ -80,19 +80,19 @@ def create_cow_image(name):
 
     return image
 
-def config_ruby(system, options):
+def config_ruby(system, args):
     cpus = []
     for cluster in system.cpu_cluster:
         for cpu in cluster.cpus:
             cpus.append(cpu)
 
-    Ruby.create_system(options, True, system, system.iobus,
+    Ruby.create_system(args, True, system, system.iobus,
                        system._dma_ports, system.realview.bootmem,
                        cpus)
 
     # Create a seperate clock domain for Ruby
     system.ruby.clk_domain = SrcClockDomain(
-        clock = options.ruby_clock,
+        clock = args.ruby_clock,
         voltage_domain = system.voltage_domain)
 
 def create(args):
@@ -195,69 +195,69 @@ def run(args):
 
 
 def main():
-    parser = optparse.OptionParser()
-
-    parser.add_option("--dtb", type=str, default=None,
-                      help="DTB file to load")
-    parser.add_option("--kernel", type=str, default=default_kernel,
-                      help="Linux kernel")
-    parser.add_option("--disk-image", type=str,
-                      default=default_disk,
-                      help="Disk to instantiate")
-    parser.add_option("--root-device", type=str,
-                      default=default_root_device,
-                      help="OS device name for root partition (default: {})"
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--dtb", type=str, default=None,
+                        help="DTB file to load")
+    parser.add_argument("--kernel", type=str, default=default_kernel,
+                        help="Linux kernel")
+    parser.add_argument("--disk-image", type=str,
+                        default=default_disk,
+                        help="Disk to instantiate")
+    parser.add_argument("--root-device", type=str,
+                        default=default_root_device,
+                        help="OS device name for root partition (default: {})"
                              .format(default_root_device))
-    parser.add_option("--script", type=str, default="",
-                      help = "Linux bootscript")
-    parser.add_option("--cpu", type="choice", choices=list(cpu_types.keys()),
-                      default="minor",
-                      help="CPU model to use")
-    parser.add_option("--cpu-freq", type=str, default="4GHz")
-    parser.add_option("-n", "--num-cpus", type="int", default=1)
-    parser.add_option("--checkpoint", action="store_true")
-    parser.add_option("--restore", type=str, default=None)
-
-    parser.add_option("--mem-type", type="choice", default="DDR3_1600_8x8",
-                      choices=ObjectList.mem_list.get_names(),
-                      help = "type of memory to use")
-    parser.add_option("--mem-channels", type="int", default=1,
-                      help = "number of memory channels")
-    parser.add_option("--mem-ranks", type="int", default=None,
-                      help = "number of memory ranks per channel")
-    parser.add_option("--mem-size", action="store", type="string",
-                      default="2GiB",
-                      help="Specify the physical memory size (single memory)")
-    parser.add_option("--enable-dram-powerdown", action="store_true",
-                       help="Enable low-power states in DRAMInterface")
-    parser.add_option("--mem-channels-intlv", type="int", default=0,
-                      help="Memory channels interleave")
-
-    parser.add_option("--num-dirs", type="int", default=1)
-    parser.add_option("--num-l2caches", type="int", default=1)
-    parser.add_option("--num-l3caches", type="int", default=1)
-    parser.add_option("--l1d_size", type="string", default="64kB")
-    parser.add_option("--l1i_size", type="string", default="32kB")
-    parser.add_option("--l2_size", type="string", default="2MB")
-    parser.add_option("--l3_size", type="string", default="16MB")
-    parser.add_option("--l1d_assoc", type="int", default=2)
-    parser.add_option("--l1i_assoc", type="int", default=2)
-    parser.add_option("--l2_assoc", type="int", default=8)
-    parser.add_option("--l3_assoc", type="int", default=16)
-    parser.add_option("--cacheline_size", type="int", default=64)
+    parser.add_argument("--script", type=str, default="",
+                        help = "Linux bootscript")
+    parser.add_argument("--cpu", choices=list(cpu_types.keys()),
+                        default="minor",
+                        help="CPU model to use")
+    parser.add_argument("--cpu-freq", type=str, default="4GHz")
+    parser.add_argument("-n", "--num-cpus", type=int, default=1)
+    parser.add_argument("--checkpoint", action="store_true")
+    parser.add_argument("--restore", type=str, default=None)
+
+    parser.add_argument("--mem-type", default="DDR3_1600_8x8",
+                        choices=ObjectList.mem_list.get_names(),
+                        help = "type of memory to use")
+    parser.add_argument("--mem-channels", type=int, default=1,
+                        help = "number of memory channels")
+    parser.add_argument("--mem-ranks", type=int, default=None,
+                        help = "number of memory ranks per channel")
+    parser.add_argument(
+        "--mem-size", action="store", type=str, default="2GiB",
+        help="Specify the physical memory size (single memory)")
+    parser.add_argument("--enable-dram-powerdown", action="store_true",
+                        help="Enable low-power states in DRAMInterface")
+    parser.add_argument("--mem-channels-intlv", type=int, default=0,
+                        help="Memory channels interleave")
+
+    parser.add_argument("--num-dirs", type=int, default=1)
+    parser.add_argument("--num-l2caches", type=int, default=1)
+    parser.add_argument("--num-l3caches", type=int, default=1)
+    parser.add_argument("--l1d_size", type=str, default="64kB")
+    parser.add_argument("--l1i_size", type=str, default="32kB")
+    parser.add_argument("--l2_size", type=str, default="2MB")
+    parser.add_argument("--l3_size", type=str, default="16MB")
+    parser.add_argument("--l1d_assoc", type=int, default=2)
+    parser.add_argument("--l1i_assoc", type=int, default=2)
+    parser.add_argument("--l2_assoc", type=int, default=8)
+    parser.add_argument("--l3_assoc", type=int, default=16)
+    parser.add_argument("--cacheline_size", type=int, default=64)
 
     Ruby.define_options(parser)
-    (options, args) = parser.parse_args()
+    args = parser.parse_args()
 
     root = Root(full_system=True)
-    root.system = create(options)
+    root.system = create(args)
 
-    if options.restore is not None:
-        m5.instantiate(options.restore)
+    if args.restore is not None:
+        m5.instantiate(args.restore)
     else:
         m5.instantiate()
 
-    run(options)
+    run(args)
 
 
 if __name__ == "__m5_main__":
diff --git a/configs/example/etrace_replay.py b/configs/example/etrace_replay.py
index 9d752ee9df..f00aba1a6e 100644
--- a/configs/example/etrace_replay.py
+++ b/configs/example/etrace_replay.py
@@ -35,7 +35,7 @@
 
 # Basic elastic traces replay script that configures a Trace CPU
 
-import optparse
+import argparse
 
 from m5.util import addToPath, fatal
 
@@ -47,7 +47,7 @@
 from common import MemConfig
 from common.Caches import *
 
-parser = optparse.OptionParser()
+parser = argparse.ArgumentParser()
 Options.addCommonOptions(parser)
 
 if '--ruby' in sys.argv:
@@ -55,37 +55,33 @@
     " because Trace CPU has been tested only with classic memory system")
     sys.exit(1)
 
-(options, args) = parser.parse_args()
-
-if args:
-    print("Error: script doesn't take any positional arguments")
-    sys.exit(1)
+args = parser.parse_args()
 
 numThreads = 1
 
-if options.cpu_type != "TraceCPU":
+if args.cpu_type != "TraceCPU":
     fatal("This is a script for elastic trace replay simulation, use "\
             "--cpu-type=TraceCPU\n");
 
-if options.num_cpus > 1:
+if args.num_cpus > 1:
     fatal("This script does not support multi-processor trace replay.\n")
 
 # In this case FutureClass will be None as there is not fast forwarding or
 # switching
-(CPUClass, test_mem_mode, FutureClass) = Simulation.setCPUClass(options)
+(CPUClass, test_mem_mode, FutureClass) = Simulation.setCPUClass(args)
 CPUClass.numThreads = numThreads
 
 system = System(cpu = CPUClass(cpu_id=0),
                 mem_mode = test_mem_mode,
-                mem_ranges = [AddrRange(options.mem_size)],
-                cache_line_size = options.cacheline_size)
+                mem_ranges = [AddrRange(args.mem_size)],
+                cache_line_size = args.cacheline_size)
 
 # Create a top-level voltage domain
-system.voltage_domain = VoltageDomain(voltage = options.sys_voltage)
+system.voltage_domain = VoltageDomain(voltage = args.sys_voltage)
 
 # Create a source clock for the system. This is used as the clock period for
 # xbar and memory
-system.clk_domain = SrcClockDomain(clock =  options.sys_clock,
+system.clk_domain = SrcClockDomain(clock =  args.sys_clock,
                                    voltage_domain = system.voltage_domain)
 
 # Create a CPU voltage domain
@@ -93,7 +89,7 @@
 
 # Create a separate clock domain for the CPUs. In case of Trace CPUs this clock
 # is actually used only by the caches connected to the CPU.
-system.cpu_clk_domain = SrcClockDomain(clock = options.cpu_clock,
+system.cpu_clk_domain = SrcClockDomain(clock = args.cpu_clock,
                                        voltage_domain =
                                        system.cpu_voltage_domain)
 
@@ -108,15 +104,15 @@
     cpu.createThreads()
 
 # Assign input trace files to the Trace CPU
-system.cpu.instTraceFile=options.inst_trace_file
-system.cpu.dataTraceFile=options.data_trace_file
+system.cpu.instTraceFile=args.inst_trace_file
+system.cpu.dataTraceFile=args.data_trace_file
 
-# Configure the classic memory system options
-MemClass = Simulation.setMemClass(options)
+# Configure the classic memory system args
+MemClass = Simulation.setMemClass(args)
 system.membus = SystemXBar()
 system.system_port = system.membus.slave
-CacheConfig.config_cache(options, system)
-MemConfig.config_mem(options, system)
+CacheConfig.config_cache(args, system)
+MemConfig.config_mem(args, system)
 
 root = Root(full_system = False, system = system)
-Simulation.run(options, root, system, FutureClass)
+Simulation.run(args, root, system, FutureClass)
diff --git a/configs/example/fs.py b/configs/example/fs.py
index f388503e28..9f7b20de4b 100644
--- a/configs/example/fs.py
+++ b/configs/example/fs.py
@@ -39,7 +39,7 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import optparse
+import argparse
 import sys
 
 import m5
@@ -64,14 +64,14 @@
 from common import Options
 
 def cmd_line_template():
-    if options.command_line and options.command_line_file:
+    if args.command_line and args.command_line_file:
         print("Error: --command-line and --command-line-file are "
               "mutually exclusive")
         sys.exit(1)
-    if options.command_line:
-        return options.command_line
-    if options.command_line_file:
-        return open(options.command_line_file).read().strip()
+    if args.command_line:
+        return args.command_line
+    if args.command_line_file:
+        return open(args.command_line_file).read().strip()
     return None
 
 def build_test_system(np):
@@ -84,61 +84,61 @@ def build_test_system(np):
         test_sys = makeBareMetalRiscvSystem(test_mem_mode, bm[0],
                                             cmdline=cmdline)
     elif buildEnv['TARGET_ISA'] == "x86":
-        test_sys = makeLinuxX86System(test_mem_mode, np, bm[0], options.ruby,
+        test_sys = makeLinuxX86System(test_mem_mode, np, bm[0], args.ruby,
                                       cmdline=cmdline)
     elif buildEnv['TARGET_ISA'] == "arm":
         test_sys = makeArmSystem(
             test_mem_mode,
-            options.machine_type,
+            args.machine_type,
             np,
             bm[0],
-            options.dtb_filename,
-            bare_metal=options.bare_metal,
+            args.dtb_filename,
+            bare_metal=args.bare_metal,
             cmdline=cmdline,
-            external_memory=options.external_memory_system,
-            ruby=options.ruby,
-            security=options.enable_security_extensions,
-            vio_9p=options.vio_9p,
-            bootloader=options.bootloader,
+            external_memory=args.external_memory_system,
+            ruby=args.ruby,
+            security=args.enable_security_extensions,
+            vio_9p=args.vio_9p,
+            bootloader=args.bootloader,
         )
-        if options.enable_context_switch_stats_dump:
+        if args.enable_context_switch_stats_dump:
             test_sys.enable_context_switch_stats_dump = True
     else:
         fatal("Incapable of building %s full system!", buildEnv['TARGET_ISA'])
 
     # Set the cache line size for the entire system
-    test_sys.cache_line_size = options.cacheline_size
+    test_sys.cache_line_size = args.cacheline_size
 
     # Create a top-level voltage domain
-    test_sys.voltage_domain = VoltageDomain(voltage = options.sys_voltage)
+    test_sys.voltage_domain = VoltageDomain(voltage = args.sys_voltage)
 
     # Create a source clock for the system and set the clock period
-    test_sys.clk_domain = SrcClockDomain(clock =  options.sys_clock,
+    test_sys.clk_domain = SrcClockDomain(clock =  args.sys_clock,
             voltage_domain = test_sys.voltage_domain)
 
     # Create a CPU voltage domain
     test_sys.cpu_voltage_domain = VoltageDomain()
 
     # Create a source clock for the CPUs and set the clock period
-    test_sys.cpu_clk_domain = SrcClockDomain(clock = options.cpu_clock,
+    test_sys.cpu_clk_domain = SrcClockDomain(clock = args.cpu_clock,
                                              voltage_domain =
                                              test_sys.cpu_voltage_domain)
 
     if buildEnv['TARGET_ISA'] == 'riscv':
-        test_sys.workload.bootloader = options.kernel
-    elif options.kernel is not None:
-        test_sys.workload.object_file = binary(options.kernel)
+        test_sys.workload.bootloader = args.kernel
+    elif args.kernel is not None:
+        test_sys.workload.object_file = binary(args.kernel)
 
-    if options.script is not None:
-        test_sys.readfile = options.script
+    if args.script is not None:
+        test_sys.readfile = args.script
 
-    if options.lpae:
+    if args.lpae:
         test_sys.have_lpae = True
 
-    if options.virtualisation:
+    if args.virtualisation:
         test_sys.have_virtualization = True
 
-    test_sys.init_param = options.init_param
+    test_sys.init_param = args.init_param
 
     # For now, assign all the CPUs to the same clock domain
     test_sys.cpu = [TestCPUClass(clk_domain=test_sys.cpu_clk_domain, cpu_id=i)
@@ -148,13 +148,13 @@ def build_test_system(np):
         ObjectList.is_kvm_cpu(FutureClass):
         test_sys.kvm_vm = KvmVM()
 
-    if options.ruby:
+    if args.ruby:
         bootmem = getattr(test_sys, '_bootmem', None)
-        Ruby.create_system(options, True, test_sys, test_sys.iobus,
+        Ruby.create_system(args, True, test_sys, test_sys.iobus,
                            test_sys._dma_ports, bootmem)
 
         # Create a seperate clock domain for Ruby
-        test_sys.ruby.clk_domain = SrcClockDomain(clock = options.ruby_clock,
+        test_sys.ruby.clk_domain = SrcClockDomain(clock = args.ruby_clock,
                                         voltage_domain = test_sys.voltage_domain)
 
         # Connect the ruby io port to the PIO bus,
@@ -172,35 +172,35 @@ def build_test_system(np):
             test_sys.ruby._cpu_ports[i].connectCpuPorts(cpu)
 
     else:
-        if options.caches or options.l2cache:
+        if args.caches or args.l2cache:
             # By default the IOCache runs at the system clock
             test_sys.iocache = IOCache(addr_ranges = test_sys.mem_ranges)
             test_sys.iocache.cpu_side = test_sys.iobus.master
             test_sys.iocache.mem_side = test_sys.membus.slave
-        elif not options.external_memory_system:
+        elif not args.external_memory_system:
             test_sys.iobridge = Bridge(delay='50ns', ranges = test_sys.mem_ranges)
             test_sys.iobridge.slave = test_sys.iobus.master
             test_sys.iobridge.master = test_sys.membus.slave
 
         # Sanity check
-        if options.simpoint_profile:
+        if args.simpoint_profile:
             if not ObjectList.is_noncaching_cpu(TestCPUClass):
                 fatal("SimPoint generation should be done with atomic cpu")
             if np > 1:
                 fatal("SimPoint generation not supported with more than one CPUs")
 
         for i in range(np):
-            if options.simpoint_profile:
-                test_sys.cpu[i].addSimPointProbe(options.simpoint_interval)
-            if options.checker:
+            if args.simpoint_profile:
+                test_sys.cpu[i].addSimPointProbe(args.simpoint_interval)
+            if args.checker:
                 test_sys.cpu[i].addCheckerCpu()
             if not ObjectList.is_kvm_cpu(TestCPUClass):
-                if options.bp_type:
-                    bpClass = ObjectList.bp_list.get(options.bp_type)
+                if args.bp_type:
+                    bpClass = ObjectList.bp_list.get(args.bp_type)
                     test_sys.cpu[i].branchPred = bpClass()
-                if options.indirect_bp_type:
+                if args.indirect_bp_type:
                     IndirectBPClass = ObjectList.indirect_bp_list.get(
-                        options.indirect_bp_type)
+                        args.indirect_bp_type)
                     test_sys.cpu[i].branchPred.indirectBranchPred = \
                         IndirectBPClass()
             test_sys.cpu[i].createThreads()
@@ -212,13 +212,13 @@ def build_test_system(np):
         # If restoring from checkpoint or fast forwarding, the code that does this for
         # FutureCPUClass is in the Simulation module. If the check passes then the
         # elastic trace probe is attached to the switch CPUs.
-        if options.elastic_trace_en and options.checkpoint_restore == None and \
-            not options.fast_forward:
-            CpuConfig.config_etrace(TestCPUClass, test_sys.cpu, options)
+        if args.elastic_trace_en and args.checkpoint_restore == None and \
+            not args.fast_forward:
+            CpuConfig.config_etrace(TestCPUClass, test_sys.cpu, args)
 
-        CacheConfig.config_cache(options, test_sys)
+        CacheConfig.config_cache(args, test_sys)
 
-        MemConfig.config_mem(options, test_sys)
+        MemConfig.config_mem(args, test_sys)
 
     return test_sys
 
@@ -238,21 +238,21 @@ def build_drive_system(np):
         drive_sys = makeLinuxX86System(drive_mem_mode, np, bm[1],
                                        cmdline=cmdline)
     elif buildEnv['TARGET_ISA'] == 'arm':
-        drive_sys = makeArmSystem(drive_mem_mode, options.machine_type, np,
-                                  bm[1], options.dtb_filename, cmdline=cmdline)
+        drive_sys = makeArmSystem(drive_mem_mode, args.machine_type, np,
+                                  bm[1], args.dtb_filename, cmdline=cmdline)
 
     # Create a top-level voltage domain
-    drive_sys.voltage_domain = VoltageDomain(voltage = options.sys_voltage)
+    drive_sys.voltage_domain = VoltageDomain(voltage = args.sys_voltage)
 
     # Create a source clock for the system and set the clock period
-    drive_sys.clk_domain = SrcClockDomain(clock =  options.sys_clock,
+    drive_sys.clk_domain = SrcClockDomain(clock =  args.sys_clock,
             voltage_domain = drive_sys.voltage_domain)
 
     # Create a CPU voltage domain
     drive_sys.cpu_voltage_domain = VoltageDomain()
 
     # Create a source clock for the CPUs and set the clock period
-    drive_sys.cpu_clk_domain = SrcClockDomain(clock = options.cpu_clock,
+    drive_sys.cpu_clk_domain = SrcClockDomain(clock = args.cpu_clock,
                                               voltage_domain =
                                               drive_sys.cpu_voltage_domain)
 
@@ -261,8 +261,8 @@ def build_drive_system(np):
     drive_sys.cpu.createThreads()
     drive_sys.cpu.createInterruptController()
     drive_sys.cpu.connectAllPorts(drive_sys.membus)
-    if options.kernel is not None:
-        drive_sys.workload.object_file = binary(options.kernel)
+    if args.kernel is not None:
+        drive_sys.workload.object_file = binary(args.kernel)
 
     if ObjectList.is_kvm_cpu(DriveCPUClass):
         drive_sys.kvm_vm = KvmVM()
@@ -279,81 +279,77 @@ def build_drive_system(np):
     for i in range(len(drive_sys.mem_ctrls)):
         drive_sys.mem_ctrls[i].port = drive_sys.membus.master
 
-    drive_sys.init_param = options.init_param
+    drive_sys.init_param = args.init_param
 
     return drive_sys
 
-# Add options
-parser = optparse.OptionParser()
+# Add args
+parser = argparse.ArgumentParser()
 Options.addCommonOptions(parser)
 Options.addFSOptions(parser)
 
-# Add the ruby specific and protocol specific options
+# Add the ruby specific and protocol specific args
 if '--ruby' in sys.argv:
     Ruby.define_options(parser)
 
-(options, args) = parser.parse_args()
-
-if args:
-    print("Error: script doesn't take any positional arguments")
-    sys.exit(1)
+args = parser.parse_args()
 
 # system under test can be any CPU
-(TestCPUClass, test_mem_mode, FutureClass) = Simulation.setCPUClass(options)
+(TestCPUClass, test_mem_mode, FutureClass) = Simulation.setCPUClass(args)
 
 # Match the memories with the CPUs, based on the options for the test system
-TestMemClass = Simulation.setMemClass(options)
+TestMemClass = Simulation.setMemClass(args)
 
-if options.benchmark:
+if args.benchmark:
     try:
-        bm = Benchmarks[options.benchmark]
+        bm = Benchmarks[args.benchmark]
     except KeyError:
-        print("Error benchmark %s has not been defined." % options.benchmark)
+        print("Error benchmark %s has not been defined." % args.benchmark)
         print("Valid benchmarks are: %s" % DefinedBenchmarks)
         sys.exit(1)
 else:
-    if options.dual:
-        bm = [SysConfig(disks=options.disk_image, rootdev=options.root_device,
-                        mem=options.mem_size, os_type=options.os_type),
-              SysConfig(disks=options.disk_image, rootdev=options.root_device,
-                        mem=options.mem_size, os_type=options.os_type)]
+    if args.dual:
+        bm = [SysConfig(disks=args.disk_image, rootdev=args.root_device,
+                        mem=args.mem_size, os_type=args.os_type),
+              SysConfig(disks=args.disk_image, rootdev=args.root_device,
+                        mem=args.mem_size, os_type=args.os_type)]
     else:
-        bm = [SysConfig(disks=options.disk_image, rootdev=options.root_device,
-                        mem=options.mem_size, os_type=options.os_type)]
+        bm = [SysConfig(disks=args.disk_image, rootdev=args.root_device,
+                        mem=args.mem_size, os_type=args.os_type)]
 
-np = options.num_cpus
+np = args.num_cpus
 
 test_sys = build_test_system(np)
 if len(bm) == 2:
     drive_sys = build_drive_system(np)
-    root = makeDualRoot(True, test_sys, drive_sys, options.etherdump)
-elif len(bm) == 1 and options.dist:
+    root = makeDualRoot(True, test_sys, drive_sys, args.etherdump)
+elif len(bm) == 1 and args.dist:
     # This system is part of a dist-gem5 simulation
     root = makeDistRoot(test_sys,
-                        options.dist_rank,
-                        options.dist_size,
-                        options.dist_server_name,
-                        options.dist_server_port,
-                        options.dist_sync_repeat,
-                        options.dist_sync_start,
-                        options.ethernet_linkspeed,
-                        options.ethernet_linkdelay,
-                        options.etherdump);
+                        args.dist_rank,
+                        args.dist_size,
+                        args.dist_server_name,
+                        args.dist_server_port,
+                        args.dist_sync_repeat,
+                        args.dist_sync_start,
+                        args.ethernet_linkspeed,
+                        args.ethernet_linkdelay,
+                        args.etherdump);
 elif len(bm) == 1:
     root = Root(full_system=True, system=test_sys)
 else:
     print("Error I don't know how to create more than 2 systems.")
     sys.exit(1)
 
-if options.timesync:
+if args.timesync:
     root.time_sync_enable = True
 
-if options.frame_capture:
+if args.frame_capture:
     VncServer.frame_capture = True
 
-if buildEnv['TARGET_ISA'] == "arm" and not options.bare_metal \
-        and not options.dtb_filename:
-    if options.machine_type not in ["VExpress_GEM5",
+if buildEnv['TARGET_ISA'] == "arm" and not args.bare_metal \
+        and not args.dtb_filename:
+    if args.machine_type not in ["VExpress_GEM5",
                                     "VExpress_GEM5_V1",
                                     "VExpress_GEM5_V2",
                                     "VExpress_GEM5_Foundation"]:
@@ -369,5 +365,5 @@ def build_drive_system(np):
                 os.path.join(m5.options.outdir, '%s.dtb' % sysname)
             sys.generateDtb(sys.workload.dtb_filename)
 
-Simulation.setWorkCountOptions(test_sys, options)
-Simulation.run(options, root, test_sys, FutureClass)
+Simulation.setWorkCountOptions(test_sys, args)
+Simulation.run(args, root, test_sys, FutureClass)
diff --git a/configs/example/garnet_synth_traffic.py b/configs/example/garnet_synth_traffic.py
index 2c74398146..3947eeb561 100644
--- a/configs/example/garnet_synth_traffic.py
+++ b/configs/example/garnet_synth_traffic.py
@@ -30,7 +30,7 @@
 from m5.objects import *
 from m5.defines import buildEnv
 from m5.util import addToPath
-import os, optparse, sys
+import os, argparse, sys
 
 addToPath('../')
 
@@ -42,41 +42,42 @@
 config_root = os.path.dirname(config_path)
 m5_root = os.path.dirname(config_root)
 
-parser = optparse.OptionParser()
+parser = argparse.ArgumentParser()
 Options.addNoISAOptions(parser)
 
-parser.add_option("--synthetic", type="choice", default="uniform_random",
-                  choices=['uniform_random', 'tornado', 'bit_complement', \
-                           'bit_reverse', 'bit_rotation', 'neighbor', \
-                            'shuffle', 'transpose'])
+parser.add_argument("--synthetic", default="uniform_random",
+                    choices=['uniform_random', 'tornado', 'bit_complement', \
+                             'bit_reverse', 'bit_rotation', 'neighbor', \
+                             'shuffle', 'transpose'])
 
-parser.add_option("-i", "--injectionrate", type="float", default=0.1,
-                  metavar="I",
-                  help="Injection rate in packets per cycle per node. \
+parser.add_argument("-i", "--injectionrate", type=float, default=0.1,
+                    metavar="I",
+                    help="Injection rate in packets per cycle per node. \
                         Takes decimal value between 0 to 1 (eg. 0.225). \
                         Number of digits after 0 depends upon --precision.")
 
-parser.add_option("--precision", type="int", default=3,
-                  help="Number of digits of precision after decimal point\
+parser.add_argument("--precision", type=int, default=3,
+                    help="Number of digits of precision after decimal point\
                         for injection rate")
 
-parser.add_option("--sim-cycles", type="int", default=1000,
-                   help="Number of simulation cycles")
+parser.add_argument("--sim-cycles", type=int, default=1000,
+                    help="Number of simulation cycles")
 
-parser.add_option("--num-packets-max", type="int", default=-1,
-                  help="Stop injecting after --num-packets-max.\
+parser.add_argument("--num-packets-max", type=int, default=-1,
+                    help="Stop injecting after --num-packets-max.\
                         Set to -1 to disable.")
 
-parser.add_option("--single-sender-id", type="int", default=-1,
-                  help="Only inject from this sender.\
+parser.add_argument("--single-sender-id", type=int, default=-1,
+                    help="Only inject from this sender.\
                         Set to -1 to disable.")
 
-parser.add_option("--single-dest-id", type="int", default=-1,
-                  help="Only send to this destination.\
+parser.add_argument("--single-dest-id", type=int, default=-1,
+                    help="Only send to this destination.\
                         Set to -1 to disable.")
 
-parser.add_option("--inj-vnet", type="int", default=-1,
-                  help="Only inject in this vnet (0, 1 or 2).\
+parser.add_argument("--inj-vnet", type=int, default=-1,
+                    choices=[-1,0,1,2],
+                    help="Only inject in this vnet (0, 1 or 2).\
                         0 and 1 are 1-flit, 2 is 5-flit.\
                         Set to -1 to inject randomly in all vnets.")
 
@@ -85,45 +86,34 @@
 #
 Ruby.define_options(parser)
 
-(options, args) = parser.parse_args()
-
-if args:
-     print("Error: script doesn't take any positional arguments")
-     sys.exit(1)
-
-
-if options.inj_vnet > 2:
-    print("Error: Injection vnet %d should be 0 (1-flit), 1 (1-flit) "
-          "or 2 (5-flit) or -1 (random)" % (options.inj_vnet))
-    sys.exit(1)
-
+args = parser.parse_args()
 
 cpus = [ GarnetSyntheticTraffic(
-                     num_packets_max=options.num_packets_max,
-                     single_sender=options.single_sender_id,
-                     single_dest=options.single_dest_id,
-                     sim_cycles=options.sim_cycles,
-                     traffic_type=options.synthetic,
-                     inj_rate=options.injectionrate,
-                     inj_vnet=options.inj_vnet,
-                     precision=options.precision,
-                     num_dest=options.num_dirs) \
-         for i in range(options.num_cpus) ]
+                     num_packets_max=args.num_packets_max,
+                     single_sender=args.single_sender_id,
+                     single_dest=args.single_dest_id,
+                     sim_cycles=args.sim_cycles,
+                     traffic_type=args.synthetic,
+                     inj_rate=args.injectionrate,
+                     inj_vnet=args.inj_vnet,
+                     precision=args.precision,
+                     num_dest=args.num_dirs) \
+         for i in range(args.num_cpus) ]
 
 # create the desired simulated system
-system = System(cpu = cpus, mem_ranges = [AddrRange(options.mem_size)])
+system = System(cpu = cpus, mem_ranges = [AddrRange(args.mem_size)])
 
 
 # Create a top-level voltage domain and clock domain
-system.voltage_domain = VoltageDomain(voltage = options.sys_voltage)
+system.voltage_domain = VoltageDomain(voltage = args.sys_voltage)
 
-system.clk_domain = SrcClockDomain(clock = options.sys_clock,
+system.clk_domain = SrcClockDomain(clock = args.sys_clock,
                                    voltage_domain = system.voltage_domain)
 
-Ruby.create_system(options, False, system)
+Ruby.create_system(args, False, system)
 
 # Create a seperate clock domain for Ruby
-system.ruby.clk_domain = SrcClockDomain(clock = options.ruby_clock,
+system.ruby.clk_domain = SrcClockDomain(clock = args.ruby_clock,
                                         voltage_domain = system.voltage_domain)
 
 i = 0
@@ -148,6 +138,6 @@
 m5.instantiate()
 
 # simulate until program terminates
-exit_event = m5.simulate(options.abs_max_tick)
+exit_event = m5.simulate(args.abs_max_tick)
 
 print('Exiting @ tick', m5.curTick(), 'because', exit_event.getCause())
diff --git a/configs/example/memcheck.py b/configs/example/memcheck.py
index 2de45efa28..c959632afa 100644
--- a/configs/example/memcheck.py
+++ b/configs/example/memcheck.py
@@ -36,26 +36,28 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import optparse
+import argparse
 import random
 import sys
 
 import m5
 from m5.objects import *
 
-parser = optparse.OptionParser()
+parser = argparse.ArgumentParser(
+    formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 
-parser.add_option("-a", "--atomic", action="store_true",
-                  help="Use atomic (non-timing) mode")
-parser.add_option("-b", "--blocking", action="store_true",
-                  help="Use blocking caches")
-parser.add_option("-m", "--maxtick", type="int", default=m5.MaxTick,
-                  metavar="T",
-                  help="Stop after T ticks")
-parser.add_option("-p", "--prefetchers", action="store_true",
-                  help="Use prefetchers")
-parser.add_option("-s", "--stridepref", action="store_true",
-                  help="Use strided prefetchers")
+
+parser.add_argument("-a", "--atomic", action="store_true",
+                    help="Use atomic (non-timing) mode")
+parser.add_argument("-b", "--blocking", action="store_true",
+                    help="Use blocking caches")
+parser.add_argument("-m", "--maxtick", type=int, default=m5.MaxTick,
+                    metavar="T",
+                    help="Stop after T ticks")
+parser.add_argument("-p", "--prefetchers", action="store_true",
+                    help="Use prefetchers")
+parser.add_argument("-s", "--stridepref", action="store_true",
+                    help="Use strided prefetchers")
 
 # This example script has a lot in common with the memtest.py in that
 # it is designed to stress tests the memory system. However, this
@@ -87,30 +89,24 @@
 # and linear address streams to ensure that the prefetchers will
 # trigger. By default prefetchers are off.
 
-parser.add_option("-c", "--caches", type="string", default="3:2",
-                  help="Colon-separated cache hierarchy specification, "
-                  "see script comments for details "
-                  "[default: %default]")
-parser.add_option("-t", "--testers", type="string", default="1:0:2",
-                  help="Colon-separated tester hierarchy specification, "
-                  "see script comments for details "
-                  "[default: %default]")
-parser.add_option("-r", "--random", action="store_true",
-                  help="Generate a random tree topology")
-parser.add_option("--sys-clock", action="store", type="string",
-                  default='1GHz',
-                  help = """Top-level clock for blocks running at system
+parser.add_argument("-c", "--caches", type=str, default="3:2",
+                    help="Colon-separated cache hierarchy specification, "
+                    "see script comments for details ")
+parser.add_argument("-t", "--testers", type=str, default="1:0:2",
+                    help="Colon-separated tester hierarchy specification, "
+                    "see script comments for details ")
+parser.add_argument("-r", "--random", action="store_true",
+                    help="Generate a random tree topology")
+parser.add_argument("--sys-clock", action="store", type=str,
+                    default='1GHz',
+                    help = """Top-level clock for blocks running at system
                   speed""")
 
-(options, args) = parser.parse_args()
-
-if args:
-     print("Error: script doesn't take any positional arguments")
-     sys.exit(1)
+args = parser.parse_args()
 
-# Start by parsing the command line options and do some basic sanity
+# Start by parsing the command line args and do some basic sanity
 # checking
-if options.random:
+if args.random:
      # Generate a tree with a valid number of testers
      tree_depth = random.randint(1, 4)
      cachespec = [random.randint(1, 3) for i in range(tree_depth)]
@@ -119,8 +115,8 @@
          "-t", ':'.join(map(str, testerspec)))
 else:
      try:
-          cachespec = [int(x) for x in options.caches.split(':')]
-          testerspec = [int(x) for x in options.testers.split(':')]
+          cachespec = [int(x) for x in args.caches.split(':')]
+          testerspec = [int(x) for x in args.testers.split(':')]
      except:
           print("Error: Unable to parse caches or testers option")
           sys.exit(1)
@@ -164,14 +160,14 @@
                  tag_latency = 1, data_latency = 1, response_latency = 1,
                  tgts_per_mshr = 8)
 
-if options.blocking:
+if args.blocking:
      proto_l1.mshrs = 1
 else:
      proto_l1.mshrs = 4
 
-if options.prefetchers:
+if args.prefetchers:
      proto_l1.prefetcher = TaggedPrefetcher()
-elif options.stridepref:
+elif args.stridepref:
      proto_l1.prefetcher = StridePrefetcher()
 
 cache_proto = [proto_l1]
@@ -218,7 +214,7 @@
 
 system.voltage_domain = VoltageDomain(voltage = '1V')
 
-system.clk_domain = SrcClockDomain(clock =  options.sys_clock,
+system.clk_domain = SrcClockDomain(clock =  args.sys_clock,
                         voltage_domain = system.voltage_domain)
 
 system.memchecker = MemChecker()
@@ -298,7 +294,7 @@ def make_cache_level(ncaches, prototypes, level, next_cache):
 last_subsys.xbar.point_of_coherency = True
 
 root = Root(full_system = False, system = system)
-if options.atomic:
+if args.atomic:
     root.system.mem_mode = 'atomic'
 else:
     root.system.mem_mode = 'timing'
@@ -311,6 +307,6 @@ def make_cache_level(ncaches, prototypes, level, next_cache):
 m5.instantiate()
 
 # Simulate until program terminates
-exit_event = m5.simulate(options.maxtick)
+exit_event = m5.simulate(args.maxtick)
 
 print('Exiting @ tick', m5.curTick(), 'because', exit_event.getCause())
diff --git a/configs/example/memtest.py b/configs/example/memtest.py
index 31530484ab..3c252dc41e 100644
--- a/configs/example/memtest.py
+++ b/configs/example/memtest.py
@@ -36,7 +36,7 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import optparse
+import argparse
 import random
 import sys
 
@@ -51,17 +51,18 @@
 # arbitrarily deep cache hierarchies, sharing or no sharing of caches,
 # and testers not only at the L1s, but also at the L2s, L3s etc.
 
-parser = optparse.OptionParser()
+parser = argparse.ArgumentParser(
+    formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 
-parser.add_option("-a", "--atomic", action="store_true",
-                  help="Use atomic (non-timing) mode")
-parser.add_option("-b", "--blocking", action="store_true",
-                  help="Use blocking caches")
-parser.add_option("-l", "--maxloads", metavar="N", default=0,
-                  help="Stop after N loads")
-parser.add_option("-m", "--maxtick", type="int", default=m5.MaxTick,
-                  metavar="T",
-                  help="Stop after T ticks")
+parser.add_argument("-a", "--atomic", action="store_true",
+                    help="Use atomic (non-timing) mode")
+parser.add_argument("-b", "--blocking", action="store_true",
+                    help="Use blocking caches")
+parser.add_argument("-l", "--maxloads", metavar="N", default=0,
+                    help="Stop after N loads")
+parser.add_argument("-m", "--maxtick", type=int, default=m5.MaxTick,
+                    metavar="T",
+                    help="Stop after T ticks")
 
 # The tree specification consists of two colon-separated lists of one
 # or more integers, one for the caches, and one for the testers. The
@@ -74,40 +75,31 @@
 # cache string as there should always be testers attached to the
 # uppermost caches.
 
-parser.add_option("-c", "--caches", type="string", default="2:2:1",
-                  help="Colon-separated cache hierarchy specification, "
-                  "see script comments for details "
-                  "[default: %default]")
-parser.add_option("--noncoherent-cache", action="store_true",
-                  help="Adds a non-coherent, last-level cache")
-parser.add_option("-t", "--testers", type="string", default="1:1:0:2",
-                  help="Colon-separated tester hierarchy specification, "
-                  "see script comments for details "
-                  "[default: %default]")
-parser.add_option("-f", "--functional", type="int", default=10,
-                  metavar="PCT",
-                  help="Target percentage of functional accesses "
-                  "[default: %default]")
-parser.add_option("-u", "--uncacheable", type="int", default=10,
-                  metavar="PCT",
-                  help="Target percentage of uncacheable accesses "
-                  "[default: %default]")
-parser.add_option("-r", "--random", action="store_true",
-                  help="Generate a random tree topology")
-parser.add_option("--progress", type="int", default=100000,
-                  metavar="NLOADS",
-                  help="Progress message interval "
-                  "[default: %default]")
-parser.add_option("--sys-clock", action="store", type="string",
-                  default='1GHz',
-                  help = """Top-level clock for blocks running at system
+parser.add_argument("-c", "--caches", type=str, default="2:2:1",
+                    help="Colon-separated cache hierarchy specification, "
+                    "see script comments for details ")
+parser.add_argument("--noncoherent-cache", action="store_true",
+                    help="Adds a non-coherent, last-level cache")
+parser.add_argument("-t", "--testers", type=str, default="1:1:0:2",
+                    help="Colon-separated tester hierarchy specification, "
+                    "see script comments for details ")
+parser.add_argument("-f", "--functional", type=int, default=10,
+                    metavar="PCT",
+                    help="Target percentage of functional accesses ")
+parser.add_argument("-u", "--uncacheable", type=int, default=10,
+                    metavar="PCT",
+                    help="Target percentage of uncacheable accesses ")
+parser.add_argument("-r", "--random", action="store_true",
+                    help="Generate a random tree topology")
+parser.add_argument("--progress", type=int, default=100000,
+                    metavar="NLOADS",
+                    help="Progress message interval ")
+parser.add_argument("--sys-clock", action="store", type=str,
+                    default='1GHz',
+                    help="""Top-level clock for blocks running at system
                   speed""")
 
-(options, args) = parser.parse_args()
-
-if args:
-     print("Error: script doesn't take any positional arguments")
-     sys.exit(1)
+args = parser.parse_args()
 
 # Get the total number of testers
 def numtesters(cachespec, testerspec):
@@ -125,9 +117,9 @@ def numtesters(cachespec, testerspec):
 
 block_size = 64
 
-# Start by parsing the command line options and do some basic sanity
+# Start by parsing the command line args and do some basic sanity
 # checking
-if options.random:
+if args.random:
      # Generate a tree with a valid number of testers
      while True:
           tree_depth = random.randint(1, 4)
@@ -140,8 +132,8 @@ def numtesters(cachespec, testerspec):
          "-t", ':'.join(map(str, testerspec)))
 else:
      try:
-          cachespec = [int(x) for x in options.caches.split(':')]
-          testerspec = [int(x) for x in options.testers.split(':')]
+          cachespec = [int(x) for x in args.caches.split(':')]
+          testerspec = [int(x) for x in args.testers.split(':')]
      except:
           print("Error: Unable to parse caches or testers option")
           sys.exit(1)
@@ -179,7 +171,7 @@ def numtesters(cachespec, testerspec):
                  tgts_per_mshr = 8, clusivity = 'mostly_incl',
                  writeback_clean = True)
 
-if options.blocking:
+if args.blocking:
      proto_l1.mshrs = 1
 else:
      proto_l1.mshrs = 4
@@ -211,10 +203,10 @@ def numtesters(cachespec, testerspec):
      cache_proto.insert(0, next)
 
 # Make a prototype for the tester to be used throughout
-proto_tester = MemTest(max_loads = options.maxloads,
-                       percent_functional = options.functional,
-                       percent_uncacheable = options.uncacheable,
-                       progress_interval = options.progress)
+proto_tester = MemTest(max_loads = args.maxloads,
+                       percent_functional = args.functional,
+                       percent_uncacheable = args.uncacheable,
+                       progress_interval = args.progress)
 
 # Set up the system along with a simple memory and reference memory
 system = System(physmem = SimpleMemory(),
@@ -222,7 +214,7 @@ def numtesters(cachespec, testerspec):
 
 system.voltage_domain = VoltageDomain(voltage = '1V')
 
-system.clk_domain = SrcClockDomain(clock =  options.sys_clock,
+system.clk_domain = SrcClockDomain(clock =  args.sys_clock,
                         voltage_domain = system.voltage_domain)
 
 # For each level, track the next subsys index to use
@@ -300,7 +292,7 @@ def make_cache_level(ncaches, prototypes, level, next_cache):
 # controller
 last_subsys = getattr(system, 'l%dsubsys0' % len(cachespec))
 last_subsys.xbar.point_of_coherency = True
-if options.noncoherent_cache:
+if args.noncoherent_cache:
      system.llc = NoncoherentCache(size = '16MB', assoc = 16, tag_latency = 10,
                                    data_latency = 10, sequential_access = True,
                                    response_latency = 20, tgts_per_mshr = 8,
@@ -311,7 +303,7 @@ def make_cache_level(ncaches, prototypes, level, next_cache):
      last_subsys.xbar.master = system.physmem.port
 
 root = Root(full_system = False, system = system)
-if options.atomic:
+if args.atomic:
     root.system.mem_mode = 'atomic'
 else:
     root.system.mem_mode = 'timing'
@@ -324,6 +316,6 @@ def make_cache_level(ncaches, prototypes, level, next_cache):
 m5.instantiate()
 
 # Simulate until program terminates
-exit_event = m5.simulate(options.maxtick)
+exit_event = m5.simulate(args.maxtick)
 
 print('Exiting @ tick', m5.curTick(), 'because', exit_event.getCause())
diff --git a/configs/example/riscv/fs_linux.py b/configs/example/riscv/fs_linux.py
index 3c781ac78d..19520becca 100644
--- a/configs/example/riscv/fs_linux.py
+++ b/configs/example/riscv/fs_linux.py
@@ -38,7 +38,7 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import optparse
+import argparse
 import sys
 from os import path
 
@@ -101,7 +101,7 @@ def generateDtb(system):
     fdt.writeDtbFile(path.join(m5.options.outdir, 'device.dtb'))
 
 # ----------------------------- Add Options ---------------------------- #
-parser = optparse.OptionParser()
+parser = argparse.ArgumentParser()
 Options.addCommonOptions(parser)
 Options.addFSOptions(parser)
 
@@ -110,23 +110,19 @@ def generateDtb(system):
     Ruby.define_options(parser)
 
 # ---------------------------- Parse Options --------------------------- #
-(options, args) = parser.parse_args()
-
-if args:
-    print("Error: script doesn't take any positional arguments")
-    sys.exit(1)
+args = parser.parse_args()
 
 # CPU and Memory
-(CPUClass, mem_mode, FutureClass) = Simulation.setCPUClass(options)
-MemClass = Simulation.setMemClass(options)
+(CPUClass, mem_mode, FutureClass) = Simulation.setCPUClass(args)
+MemClass = Simulation.setMemClass(args)
 
-np = options.num_cpus
+np = args.num_cpus
 
 # ---------------------------- Setup System ---------------------------- #
 # Edit this section to customize peripherals and system settings
 system = System()
-mdesc = SysConfig(disks=options.disk_image, rootdev=options.root_device,
-                        mem=options.mem_size, os_type=options.os_type)
+mdesc = SysConfig(disks=args.disk_image, rootdev=args.root_device,
+                        mem=args.mem_size, os_type=args.os_type)
 system.mem_mode = mem_mode
 system.mem_ranges = [AddrRange(start=0x80000000, size=mdesc.mem())]
 
@@ -166,63 +162,63 @@ def generateDtb(system):
 # ---------------------------- Default Setup --------------------------- #
 
 # Set the cache line size for the entire system
-system.cache_line_size = options.cacheline_size
+system.cache_line_size = args.cacheline_size
 
 # Create a top-level voltage domain
-system.voltage_domain = VoltageDomain(voltage = options.sys_voltage)
+system.voltage_domain = VoltageDomain(voltage = args.sys_voltage)
 
 # Create a source clock for the system and set the clock period
-system.clk_domain = SrcClockDomain(clock =  options.sys_clock,
+system.clk_domain = SrcClockDomain(clock =  args.sys_clock,
         voltage_domain = system.voltage_domain)
 
 # Create a CPU voltage domain
 system.cpu_voltage_domain = VoltageDomain()
 
 # Create a source clock for the CPUs and set the clock period
-system.cpu_clk_domain = SrcClockDomain(clock = options.cpu_clock,
+system.cpu_clk_domain = SrcClockDomain(clock = args.cpu_clock,
                                             voltage_domain =
                                             system.cpu_voltage_domain)
 
-system.workload.object_file = options.kernel
+system.workload.object_file = args.kernel
 
 # NOTE: Not yet tested
-if options.script is not None:
-    system.readfile = options.script
+if args.script is not None:
+    system.readfile = args.script
 
-system.init_param = options.init_param
+system.init_param = args.init_param
 
 system.cpu = [CPUClass(clk_domain=system.cpu_clk_domain, cpu_id=i)
                 for i in range(np)]
 
-if options.caches or options.l2cache:
+if args.caches or args.l2cache:
     # By default the IOCache runs at the system clock
     system.iocache = IOCache(addr_ranges = system.mem_ranges)
     system.iocache.cpu_side = system.iobus.mem_side_ports
     system.iocache.mem_side = system.membus.cpu_side_ports
-elif not options.external_memory_system:
+elif not args.external_memory_system:
     system.iobridge = Bridge(delay='50ns', ranges = system.mem_ranges)
     system.iobridge.cpu_side_ports = system.iobus.mem_side_ports
     system.iobridge.mem_side_ports = system.membus.cpu_side_ports
 
 # Sanity check
-if options.simpoint_profile:
+if args.simpoint_profile:
     if not ObjectList.is_noncaching_cpu(CPUClass):
         fatal("SimPoint generation should be done with atomic cpu")
     if np > 1:
         fatal("SimPoint generation not supported with more than one CPUs")
 
 for i in range(np):
-    if options.simpoint_profile:
-        system.cpu[i].addSimPointProbe(options.simpoint_interval)
-    if options.checker:
+    if args.simpoint_profile:
+        system.cpu[i].addSimPointProbe(args.simpoint_interval)
+    if args.checker:
         system.cpu[i].addCheckerCpu()
     if not ObjectList.is_kvm_cpu(CPUClass):
-        if options.bp_type:
-            bpClass = ObjectList.bp_list.get(options.bp_type)
+        if args.bp_type:
+            bpClass = ObjectList.bp_list.get(args.bp_type)
             system.cpu[i].branchPred = bpClass()
-        if options.indirect_bp_type:
+        if args.indirect_bp_type:
             IndirectBPClass = ObjectList.indirect_bp_list.get(
-                options.indirect_bp_type)
+                args.indirect_bp_type)
             system.cpu[i].branchPred.indirectBranchPred = \
                 IndirectBPClass()
     system.cpu[i].createThreads()
@@ -257,15 +253,15 @@ def generateDtb(system):
 
 # ---------------------------- Default Setup --------------------------- #
 
-if options.elastic_trace_en and options.checkpoint_restore == None and \
-    not options.fast_forward:
-    CpuConfig.config_etrace(CPUClass, system.cpu, options)
+if args.elastic_trace_en and args.checkpoint_restore == None and \
+    not args.fast_forward:
+    CpuConfig.config_etrace(CPUClass, system.cpu, args)
 
-CacheConfig.config_cache(options, system)
+CacheConfig.config_cache(args, system)
 
-MemConfig.config_mem(options, system)
+MemConfig.config_mem(args, system)
 
 root = Root(full_system=True, system=system)
 
-Simulation.setWorkCountOptions(system, options)
-Simulation.run(options, root, system, FutureClass)
+Simulation.setWorkCountOptions(system, args)
+Simulation.run(args, root, system, FutureClass)
diff --git a/configs/example/ruby_direct_test.py b/configs/example/ruby_direct_test.py
index 163b38fede..cdf13d5920 100644
--- a/configs/example/ruby_direct_test.py
+++ b/configs/example/ruby_direct_test.py
@@ -29,7 +29,7 @@
 from m5.objects import *
 from m5.defines import buildEnv
 from m5.util import addToPath
-import os, optparse, sys
+import os, argparse, sys
 
 addToPath('../')
 
@@ -41,60 +41,56 @@
 config_root = os.path.dirname(config_path)
 m5_root = os.path.dirname(config_root)
 
-parser = optparse.OptionParser()
+parser = argparse.ArgumentParser()
 Options.addNoISAOptions(parser)
 
-parser.add_option("--requests", metavar="N", default=100,
-                  help="Stop after N requests")
-parser.add_option("-f", "--wakeup_freq", metavar="N", default=10,
-                  help="Wakeup every N cycles")
-parser.add_option("--test-type", type="choice", default="SeriesGetx",
-                  choices = ["SeriesGetx", "SeriesGets", "SeriesGetMixed",
-                             "Invalidate"],
-                  help = "Type of test")
-parser.add_option("--percent-writes", type="int", default=100,
-                  help="percentage of accesses that should be writes")
+parser.add_argument("--requests", metavar="N", default=100,
+                    help="Stop after N requests")
+parser.add_argument("-f", "--wakeup_freq", metavar="N", default=10,
+                    help="Wakeup every N cycles")
+parser.add_argument("--test-type", default="SeriesGetx",
+                    choices = ["SeriesGetx", "SeriesGets", "SeriesGetMixed",
+                               "Invalidate"],
+                    help = "Type of test")
+parser.add_argument("--percent-writes", type=int, default=100,
+                    help="percentage of accesses that should be writes")
 
 #
-# Add the ruby specific and protocol specific options
+# Add the ruby specific and protocol specific args
 #
 Ruby.define_options(parser)
-(options, args) = parser.parse_args()
-
-if args:
-     print("Error: script doesn't take any positional arguments")
-     sys.exit(1)
+args = parser.parse_args()
 
 #
 # Select the direct test generator
 #
-if options.test_type == "SeriesGetx":
-    generator = SeriesRequestGenerator(num_cpus = options.num_cpus,
+if args.test_type == "SeriesGetx":
+    generator = SeriesRequestGenerator(num_cpus = args.num_cpus,
                                        percent_writes = 100)
-elif options.test_type == "SeriesGets":
-    generator = SeriesRequestGenerator(num_cpus = options.num_cpus,
+elif args.test_type == "SeriesGets":
+    generator = SeriesRequestGenerator(num_cpus = args.num_cpus,
                                        percent_writes = 0)
-elif options.test_type == "SeriesGetMixed":
-    generator = SeriesRequestGenerator(num_cpus = options.num_cpus,
-                                       percent_writes = options.percent_writes)
-elif options.test_type == "Invalidate":
-    generator = InvalidateGenerator(num_cpus = options.num_cpus)
+elif args.test_type == "SeriesGetMixed":
+    generator = SeriesRequestGenerator(num_cpus = args.num_cpus,
+                                       percent_writes = args.percent_writes)
+elif args.test_type == "Invalidate":
+    generator = InvalidateGenerator(num_cpus = args.num_cpus)
 else:
     print("Error: unknown direct test generator")
     sys.exit(1)
 
 # Create the M5 system.
-system = System(mem_ranges = [AddrRange(options.mem_size)])
+system = System(mem_ranges = [AddrRange(args.mem_size)])
 
 
 # Create a top-level voltage domain and clock domain
-system.voltage_domain = VoltageDomain(voltage = options.sys_voltage)
+system.voltage_domain = VoltageDomain(voltage = args.sys_voltage)
 
-system.clk_domain = SrcClockDomain(clock = options.sys_clock,
+system.clk_domain = SrcClockDomain(clock = args.sys_clock,
                                    voltage_domain = system.voltage_domain)
 
 # Create the ruby random tester
-system.cpu = RubyDirectedTester(requests_to_complete = options.requests,
+system.cpu = RubyDirectedTester(requests_to_complete = args.requests,
                                 generator = generator)
 
 # the ruby tester reuses num_cpus to specify the
@@ -102,14 +98,14 @@
 # is stored in system.cpu. because there is only ever one
 # tester object, num_cpus is not necessarily equal to the
 # size of system.cpu
-cpu_list = [ system.cpu ] * options.num_cpus
-Ruby.create_system(options, False, system, cpus=cpu_list)
+cpu_list = [ system.cpu ] * args.num_cpus
+Ruby.create_system(args, False, system, cpus=cpu_list)
 
 # Since Ruby runs at an independent frequency, create a seperate clock
-system.ruby.clk_domain = SrcClockDomain(clock = options.ruby_clock,
+system.ruby.clk_domain = SrcClockDomain(clock = args.ruby_clock,
                                         voltage_domain = system.voltage_domain)
 
-assert(options.num_cpus == len(system.ruby._cpu_ports))
+assert(args.num_cpus == len(system.ruby._cpu_ports))
 
 for ruby_port in system.ruby._cpu_ports:
     #
@@ -131,6 +127,6 @@
 m5.instantiate()
 
 # simulate until program terminates
-exit_event = m5.simulate(options.abs_max_tick)
+exit_event = m5.simulate(args.abs_max_tick)
 
 print('Exiting @ tick', m5.curTick(), 'because', exit_event.getCause())
diff --git a/configs/example/ruby_gpu_random_test.py b/configs/example/ruby_gpu_random_test.py
index 745ad0298b..75aac2eb8d 100644
--- a/configs/example/ruby_gpu_random_test.py
+++ b/configs/example/ruby_gpu_random_test.py
@@ -33,7 +33,7 @@
 from m5.objects import *
 from m5.defines import buildEnv
 from m5.util import addToPath
-import os, optparse, sys
+import os, argparse, sys
 
 addToPath('../')
 
@@ -43,99 +43,95 @@
 #
 # Add the ruby specific and protocol specific options
 #
-parser = optparse.OptionParser()
+parser = argparse.ArgumentParser()
 Options.addNoISAOptions(parser)
 Ruby.define_options(parser)
 
 # GPU Ruby tester options
-parser.add_option("--cache-size", type="choice", default="small",
-                  choices=["small", "large"],
-                  help="Cache sizes to use. Small encourages races between \
+parser.add_argument("--cache-size", default="small",
+                    choices=["small", "large"],
+                    help="Cache sizes to use. Small encourages races between \
                         requests and writebacks. Large stresses write-through \
                         and/or write-back GPU caches.")
-parser.add_option("--system-size", type="choice", default="small",
-                  choices=["small", "medium", "large"],
-                  help="This option defines how many CUs, CPUs and cache \
+parser.add_argument("--system-size", default="small",
+                    choices=["small", "medium", "large"],
+                    help="This option defines how many CUs, CPUs and cache \
                         components in the test system.")
-parser.add_option("--address-range", type="choice", default="small",
-                  choices=["small", "large"],
-                  help="This option defines the number of atomic \
+parser.add_argument("--address-range", default="small",
+                    choices=["small", "large"],
+                    help="This option defines the number of atomic \
                         locations that affects the working set's size. \
                         A small number of atomic locations encourage more \
                         races among threads. The large option stresses cache \
                         resources.")
-parser.add_option("--episode-length", type="choice", default="short",
-                  choices=["short", "medium", "long"],
-                  help="This option defines the number of LDs and \
+parser.add_argument("--episode-length", default="short",
+                    choices=["short", "medium", "long"],
+                    help="This option defines the number of LDs and \
                         STs in an episode. The small option encourages races \
                         between the start and end of an episode. The long \
                         option encourages races between LDs and STs in the \
                         same episode.")
-parser.add_option("--test-length", type="int", default=1,
-                  help="The number of episodes to be executed by each \
+parser.add_argument("--test-length", type=int, default=1,
+                    help="The number of episodes to be executed by each \
                         wavefront. This determines the maximum number, i.e., \
                         val X #WFs, of episodes to be executed in the test.")
-parser.add_option("--debug-tester", action='store_true',
-                  help="This option will turn on DRF checker")
-parser.add_option("--random-seed", type="int", default=0,
-                  help="Random seed number. Default value (i.e., 0) means \
+parser.add_argument("--debug-tester", action='store_true',
+                    help="This option will turn on DRF checker")
+parser.add_argument("--random-seed", type=int, default=0,
+                    help="Random seed number. Default value (i.e., 0) means \
                         using runtime-specific value")
-parser.add_option("--log-file", type="string", default="gpu-ruby-test.log")
-parser.add_option("--num-dmas", type="int", default=0,
-                  help="The number of DMA engines to use in tester config.")
+parser.add_argument("--log-file", type=str, default="gpu-ruby-test.log")
+parser.add_argument("--num-dmas", type=int, default=0,
+                    help="The number of DMA engines to use in tester config.")
 
-(options, args) = parser.parse_args()
-
-if args:
-     print("Error: script doesn't take any positional arguments")
-     sys.exit(1)
+args = parser.parse_args()
 
 #
 # Set up cache size - 2 options
 #   0: small cache
 #   1: large cache
 #
-if (options.cache_size == "small"):
-    options.tcp_size="256B"
-    options.tcp_assoc=2
-    options.tcc_size="1kB"
-    options.tcc_assoc=2
-elif (options.cache_size == "large"):
-    options.tcp_size="256kB"
-    options.tcp_assoc=16
-    options.tcc_size="1024kB"
-    options.tcc_assoc=16
+if (args.cache_size == "small"):
+    args.tcp_size="256B"
+    args.tcp_assoc=2
+    args.tcc_size="1kB"
+    args.tcc_assoc=2
+elif (args.cache_size == "large"):
+    args.tcp_size="256kB"
+    args.tcp_assoc=16
+    args.tcc_size="1024kB"
+    args.tcc_assoc=16
 
 #
 # Set up system size - 3 options
 #
-if (options.system_size == "small"):
+if (args.system_size == "small"):
     # 1 CU, 1 CPU, 1 SQC, 1 Scalar
-    options.wf_size = 1
-    options.wavefronts_per_cu = 1
-    options.num_cpus = 1
-    options.num_dmas = 1
-    options.cu_per_sqc = 1
-    options.cu_per_scalar_cache = 1
-    options.num_compute_units = 1
-elif (options.system_size == "medium"):
+    args.wf_size = 1
+    args.wavefronts_per_cu = 1
+    args.num_cpus = 1
+    args.num_dmas = 1
+    args.cu_per_sqc = 1
+    args.cu_per_scalar_cache = 1
+    args.num_compute_units = 1
+elif (args.system_size == "medium"):
     # 4 CUs, 4 CPUs, 1 SQCs, 1 Scalars
-    options.wf_size = 16
-    options.wavefronts_per_cu = 4
-    options.num_cpus = 4
-    options.num_dmas = 2
-    options.cu_per_sqc = 4
-    options.cu_per_scalar_cache = 4
-    options.num_compute_units = 4
-elif (options.system_size == "large"):
+    args.wf_size = 16
+    args.wavefronts_per_cu = 4
+    args.num_cpus = 4
+    args.num_dmas = 2
+    args.cu_per_sqc = 4
+    args.cu_per_scalar_cache = 4
+    args.num_compute_units = 4
+elif (args.system_size == "large"):
     # 8 CUs, 4 CPUs, 1 SQCs, 1 Scalars
-    options.wf_size = 32
-    options.wavefronts_per_cu = 4
-    options.num_cpus = 4
-    options.num_dmas = 4
-    options.cu_per_sqc = 4
-    options.cu_per_scalar_cache = 4
-    options.num_compute_units = 8
+    args.wf_size = 32
+    args.wavefronts_per_cu = 4
+    args.num_cpus = 4
+    args.num_dmas = 4
+    args.cu_per_sqc = 4
+    args.cu_per_scalar_cache = 4
+    args.num_compute_units = 8
 
 #
 # Set address range - 2 options
@@ -143,11 +139,11 @@
 #   level 1: large
 # Each location corresponds to a 4-byte piece of data
 #
-options.mem_size = '1024MB'
-if (options.address_range == "small"):
+args.mem_size = '1024MB'
+if (args.address_range == "small"):
     num_atomic_locs = 10
     num_regular_locs_per_atomic_loc = 10000
-elif (options.address_range == "large"):
+elif (args.address_range == "large"):
     num_atomic_locs = 100
     num_regular_locs_per_atomic_loc = 100000
 
@@ -157,11 +153,11 @@
 #   1: 100 actions
 #   2: 500 actions
 #
-if (options.episode_length == "short"):
+if (args.episode_length == "short"):
     eps_length = 10
-elif (options.episode_length == "medium"):
+elif (args.episode_length == "medium"):
     eps_length = 100
-elif (options.episode_length == "long"):
+elif (args.episode_length == "long"):
     eps_length = 500
 
 #
@@ -173,47 +169,47 @@
 # to detect deadlock caused by Ruby protocol first before one caused by the
 # coalescer. Both units are in Ticks
 #
-options.cache_deadlock_threshold = 1e8
+args.cache_deadlock_threshold = 1e8
 tester_deadlock_threshold = 1e9
 
 # For now we're testing only GPU protocol, so we force num_cpus to be 0
-options.num_cpus = 0
+args.num_cpus = 0
 
 # Number of DMA engines
-n_DMAs = options.num_dmas
+n_DMAs = args.num_dmas
 
 # Number of CUs
-n_CUs = options.num_compute_units
+n_CUs = args.num_compute_units
 
 # Set test length, i.e., number of episodes per wavefront * #WFs.
 # Test length can be 1x#WFs, 10x#WFs, 100x#WFs, ...
-n_WFs = n_CUs * options.wavefronts_per_cu
-max_episodes = options.test_length * n_WFs
+n_WFs = n_CUs * args.wavefronts_per_cu
+max_episodes = args.test_length * n_WFs
 
 # Number of SQC and Scalar caches
-assert(n_CUs % options.cu_per_sqc == 0)
-n_SQCs = n_CUs // options.cu_per_sqc
-options.num_sqc = n_SQCs
+assert(n_CUs % args.cu_per_sqc == 0)
+n_SQCs = n_CUs // args.cu_per_sqc
+args.num_sqc = n_SQCs
 
-assert(options.cu_per_scalar_cache != 0)
-n_Scalars = n_CUs // options.cu_per_scalar_cache
-options.num_scalar_cache = n_Scalars
+assert(args.cu_per_scalar_cache != 0)
+n_Scalars = n_CUs // args.cu_per_scalar_cache
+args.num_scalar_cache = n_Scalars
 
 #
 # Create GPU Ruby random tester
 #
-tester = ProtocolTester(cus_per_sqc = options.cu_per_sqc,
-                        cus_per_scalar = options.cu_per_scalar_cache,
-                        wavefronts_per_cu = options.wavefronts_per_cu,
-                        workitems_per_wavefront = options.wf_size,
+tester = ProtocolTester(cus_per_sqc = args.cu_per_sqc,
+                        cus_per_scalar = args.cu_per_scalar_cache,
+                        wavefronts_per_cu = args.wavefronts_per_cu,
+                        workitems_per_wavefront = args.wf_size,
                         num_atomic_locations = num_atomic_locs,
                         num_normal_locs_per_atomic = \
                                           num_regular_locs_per_atomic_loc,
                         max_num_episodes = max_episodes,
                         episode_length = eps_length,
-                        debug_tester = options.debug_tester,
-                        random_seed = options.random_seed,
-                        log_file = options.log_file)
+                        debug_tester = args.debug_tester,
+                        random_seed = args.random_seed,
+                        log_file = args.log_file)
 
 #
 # Create a gem5 system. Note that the memory object isn't actually used by the
@@ -222,12 +218,12 @@
 # has physical ports to be connected to Ruby
 #
 system = System(cpu = tester,
-                mem_ranges = [AddrRange(options.mem_size)],
-                cache_line_size = options.cacheline_size,
+                mem_ranges = [AddrRange(args.mem_size)],
+                cache_line_size = args.cacheline_size,
                 mem_mode = 'timing')
 
-system.voltage_domain = VoltageDomain(voltage = options.sys_voltage)
-system.clk_domain = SrcClockDomain(clock = options.sys_clock,
+system.voltage_domain = VoltageDomain(voltage = args.sys_voltage)
+system.clk_domain = SrcClockDomain(clock = args.sys_clock,
                                    voltage_domain = system.voltage_domain)
 
 #
@@ -235,7 +231,7 @@
 # kernels. Setting it to zero disables the VIPER protocol from creating
 # a command processor and its caches.
 #
-options.num_cp = 0
+args.num_cp = 0
 
 #
 # Make generic DMA sequencer for Ruby to use
@@ -254,8 +250,8 @@
 # is stored in system.cpu. because there is only ever one
 # tester object, num_cpus is not necessarily equal to the
 # size of system.cpu
-cpu_list = [ system.cpu ] * options.num_cpus
-Ruby.create_system(options = options, full_system = False,
+cpu_list = [ system.cpu ] * args.num_cpus
+Ruby.create_system(args, full_system = False,
                    system = system, dma_ports = system.dma_devices,
                    cpus = cpu_list)
 
@@ -340,7 +336,7 @@
     for wf_idx in range(tester.wavefronts_per_cu):
         wavefronts.append(GpuWavefront(thread_id = g_thread_idx,
                                          cu_id = cu_idx,
-                                         num_lanes = options.wf_size,
+                                         num_lanes = args.wf_size,
                                          clk_domain = thread_clock,
                                          deadlock_threshold = \
                                                 tester_deadlock_threshold))
diff --git a/configs/example/ruby_mem_test.py b/configs/example/ruby_mem_test.py
index cf47a60e9e..9face88b0a 100644
--- a/configs/example/ruby_mem_test.py
+++ b/configs/example/ruby_mem_test.py
@@ -29,7 +29,7 @@
 from m5.objects import *
 from m5.defines import buildEnv
 from m5.util import addToPath
-import os, optparse, sys
+import os, argparse, sys
 
 addToPath('../')
 
@@ -40,74 +40,70 @@
 config_path = os.path.dirname(os.path.abspath(__file__))
 config_root = os.path.dirname(config_path)
 
-parser = optparse.OptionParser()
+parser = argparse.ArgumentParser(
+    formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 Options.addNoISAOptions(parser)
 
-parser.add_option("--maxloads", metavar="N", default=0,
-                  help="Stop after N loads")
-parser.add_option("--progress", type="int", default=1000,
-                  metavar="NLOADS",
-                  help="Progress message interval "
-                  "[default: %default]")
-parser.add_option("--num-dmas", type="int", default=0, help="# of dma testers")
-parser.add_option("--functional", type="int", default=0,
-                  help="percentage of accesses that should be functional")
-parser.add_option("--suppress-func-errors", action="store_true",
-                  help="suppress panic when functional accesses fail")
+parser.add_argument("--maxloads", metavar="N", default=0,
+                    help="Stop after N loads")
+parser.add_argument("--progress", type=int, default=1000,
+                    metavar="NLOADS",
+                    help="Progress message interval ")
+parser.add_argument("--num-dmas", type=int, default=0, help="# of dma testers")
+parser.add_argument("--functional", type=int, default=0,
+                    help="percentage of accesses that should be functional")
+parser.add_argument("--suppress-func-errors", action="store_true",
+                    help="suppress panic when functional accesses fail")
 
 #
 # Add the ruby specific and protocol specific options
 #
 Ruby.define_options(parser)
 
-(options, args) = parser.parse_args()
+args = parser.parse_args()
 
 #
 # Set the default cache size and associativity to be very small to encourage
 # races between requests and writebacks.
 #
-options.l1d_size="256B"
-options.l1i_size="256B"
-options.l2_size="512B"
-options.l3_size="1kB"
-options.l1d_assoc=2
-options.l1i_assoc=2
-options.l2_assoc=2
-options.l3_assoc=2
-
-if args:
-     print("Error: script doesn't take any positional arguments")
-     sys.exit(1)
+args.l1d_size="256B"
+args.l1i_size="256B"
+args.l2_size="512B"
+args.l3_size="1kB"
+args.l1d_assoc=2
+args.l1i_assoc=2
+args.l2_assoc=2
+args.l3_assoc=2
 
 block_size = 64
 
-if options.num_cpus > block_size:
+if args.num_cpus > block_size:
      print("Error: Number of testers %d limited to %d because of false sharing"
-           % (options.num_cpus, block_size))
+           % (args.num_cpus, block_size))
      sys.exit(1)
 
 #
 # Currently ruby does not support atomic or uncacheable accesses
 #
-cpus = [ MemTest(max_loads = options.maxloads,
-                 percent_functional = options.functional,
+cpus = [ MemTest(max_loads = args.maxloads,
+                 percent_functional = args.functional,
                  percent_uncacheable = 0,
-                 progress_interval = options.progress,
-                 suppress_func_errors = options.suppress_func_errors) \
-         for i in range(options.num_cpus) ]
+                 progress_interval = args.progress,
+                 suppress_func_errors = args.suppress_func_errors) \
+         for i in range(args.num_cpus) ]
 
 system = System(cpu = cpus,
-                clk_domain = SrcClockDomain(clock = options.sys_clock),
-                mem_ranges = [AddrRange(options.mem_size)])
+                clk_domain = SrcClockDomain(clock = args.sys_clock),
+                mem_ranges = [AddrRange(args.mem_size)])
 
-if options.num_dmas > 0:
-    dmas = [ MemTest(max_loads = options.maxloads,
+if args.num_dmas > 0:
+    dmas = [ MemTest(max_loads = args.maxloads,
                      percent_functional = 0,
                      percent_uncacheable = 0,
-                     progress_interval = options.progress,
+                     progress_interval = args.progress,
                      suppress_func_errors =
-                                        not options.suppress_func_errors) \
-             for i in range(options.num_dmas) ]
+                                        not args.suppress_func_errors) \
+             for i in range(args.num_dmas) ]
     system.dma_devices = dmas
 else:
     dmas = []
@@ -115,14 +111,14 @@
 dma_ports = []
 for (i, dma) in enumerate(dmas):
     dma_ports.append(dma.test)
-Ruby.create_system(options, False, system, dma_ports = dma_ports)
+Ruby.create_system(args, False, system, dma_ports = dma_ports)
 
 # Create a top-level voltage domain and clock domain
-system.voltage_domain = VoltageDomain(voltage = options.sys_voltage)
-system.clk_domain = SrcClockDomain(clock = options.sys_clock,
+system.voltage_domain = VoltageDomain(voltage = args.sys_voltage)
+system.clk_domain = SrcClockDomain(clock = args.sys_clock,
                                    voltage_domain = system.voltage_domain)
 # Create a seperate clock domain for Ruby
-system.ruby.clk_domain = SrcClockDomain(clock = options.ruby_clock,
+system.ruby.clk_domain = SrcClockDomain(clock = args.ruby_clock,
                                         voltage_domain = system.voltage_domain)
 
 #
@@ -159,6 +155,6 @@
 m5.instantiate()
 
 # simulate until program terminates
-exit_event = m5.simulate(options.abs_max_tick)
+exit_event = m5.simulate(args.abs_max_tick)
 
 print('Exiting @ tick', m5.curTick(), 'because', exit_event.getCause())
diff --git a/configs/example/ruby_random_test.py b/configs/example/ruby_random_test.py
index da824295dc..26c933cff8 100644
--- a/configs/example/ruby_random_test.py
+++ b/configs/example/ruby_random_test.py
@@ -29,7 +29,7 @@
 from m5.objects import *
 from m5.defines import buildEnv
 from m5.util import addToPath
-import os, optparse, sys
+import os, argparse, sys
 
 addToPath('../')
 
@@ -41,13 +41,13 @@
 config_root = os.path.dirname(config_path)
 m5_root = os.path.dirname(config_root)
 
-parser = optparse.OptionParser()
+parser = argparse.ArgumentParser()
 Options.addNoISAOptions(parser)
 
-parser.add_option("--maxloads", metavar="N", default=100,
-                  help="Stop after N loads")
-parser.add_option("-f", "--wakeup_freq", metavar="N", default=10,
-                  help="Wakeup every N cycles")
+parser.add_argument("--maxloads", metavar="N", default=100,
+                    help="Stop after N loads")
+parser.add_argument("-f", "--wakeup_freq", metavar="N", default=10,
+                    help="Wakeup every N cycles")
 
 #
 # Add the ruby specific and protocol specific options
@@ -58,24 +58,20 @@
     open(os.path.join(config_root, "common", "Options.py")).read(), \
     os.path.join(config_root, "common", "Options.py"), 'exec'))
 
-(options, args) = parser.parse_args()
+args = parser.parse_args()
 
 #
 # Set the default cache size and associativity to be very small to encourage
 # races between requests and writebacks.
 #
-options.l1d_size="256B"
-options.l1i_size="256B"
-options.l2_size="512B"
-options.l3_size="1kB"
-options.l1d_assoc=2
-options.l1i_assoc=2
-options.l2_assoc=2
-options.l3_assoc=2
-
-if args:
-     print("Error: script doesn't take any positional arguments")
-     sys.exit(1)
+args.l1d_size="256B"
+args.l1i_size="256B"
+args.l2_size="512B"
+args.l3_size="1kB"
+args.l1d_assoc=2
+args.l1i_assoc=2
+args.l2_assoc=2
+args.l3_assoc=2
 
 #
 # Create the ruby random tester
@@ -87,20 +83,20 @@
     check_flush = True
 
 tester = RubyTester(check_flush = check_flush,
-                    checks_to_complete = options.maxloads,
-                    wakeup_frequency = options.wakeup_freq)
+                    checks_to_complete = args.maxloads,
+                    wakeup_frequency = args.wakeup_freq)
 
 #
 # Create the M5 system.  Note that the Memory Object isn't
 # actually used by the rubytester, but is included to support the
 # M5 memory size == Ruby memory size checks
 #
-system = System(cpu = tester, mem_ranges = [AddrRange(options.mem_size)])
+system = System(cpu = tester, mem_ranges = [AddrRange(args.mem_size)])
 
 # Create a top-level voltage domain and clock domain
-system.voltage_domain = VoltageDomain(voltage = options.sys_voltage)
+system.voltage_domain = VoltageDomain(voltage = args.sys_voltage)
 
-system.clk_domain = SrcClockDomain(clock = options.sys_clock,
+system.clk_domain = SrcClockDomain(clock = args.sys_clock,
                                    voltage_domain = system.voltage_domain)
 
 # the ruby tester reuses num_cpus to specify the
@@ -108,14 +104,14 @@
 # is stored in system.cpu. because there is only ever one
 # tester object, num_cpus is not necessarily equal to the
 # size of system.cpu
-cpu_list = [ system.cpu ] * options.num_cpus
-Ruby.create_system(options, False, system, cpus=cpu_list)
+cpu_list = [ system.cpu ] * args.num_cpus
+Ruby.create_system(args, False, system, cpus=cpu_list)
 
 # Create a seperate clock domain for Ruby
-system.ruby.clk_domain = SrcClockDomain(clock = options.ruby_clock,
+system.ruby.clk_domain = SrcClockDomain(clock = args.ruby_clock,
                                         voltage_domain = system.voltage_domain)
 
-assert(options.num_cpus == len(system.ruby._cpu_ports))
+assert(args.num_cpus == len(system.ruby._cpu_ports))
 
 tester.num_cpus = len(system.ruby._cpu_ports)
 
@@ -159,6 +155,6 @@
 m5.instantiate()
 
 # simulate until program terminates
-exit_event = m5.simulate(options.abs_max_tick)
+exit_event = m5.simulate(args.abs_max_tick)
 
 print('Exiting @ tick', m5.curTick(), 'because', exit_event.getCause())
diff --git a/configs/example/se.py b/configs/example/se.py
index c552e57d3b..891dd72ee1 100644
--- a/configs/example/se.py
+++ b/configs/example/se.py
@@ -40,7 +40,7 @@
 #
 # "m5 test.py"
 
-import optparse
+import argparse
 import sys
 import os
 
@@ -64,8 +64,8 @@
 from common.Caches import *
 from common.cpu2000 import *
 
-def get_processes(options):
-    """Interprets provided options and returns a list of processes"""
+def get_processes(args):
+    """Interprets provided args and returns a list of processes"""
 
     multiprocesses = []
     inputs = []
@@ -73,15 +73,15 @@ def get_processes(options):
     errouts = []
     pargs = []
 
-    workloads = options.cmd.split(';')
-    if options.input != "":
-        inputs = options.input.split(';')
-    if options.output != "":
-        outputs = options.output.split(';')
-    if options.errout != "":
-        errouts = options.errout.split(';')
-    if options.options != "":
-        pargs = options.options.split(';')
+    workloads = args.cmd.split(';')
+    if args.input != "":
+        inputs = args.input.split(';')
+    if args.output != "":
+        outputs = args.output.split(';')
+    if args.errout != "":
+        errouts = args.errout.split(';')
+    if args.options != "":
+        pargs = args.options.split(';')
 
     idx = 0
     for wrkld in workloads:
@@ -89,8 +89,8 @@ def get_processes(options):
         process.executable = wrkld
         process.cwd = os.getcwd()
 
-        if options.env:
-            with open(options.env, 'r') as f:
+        if args.env:
+            with open(args.env, 'r') as f:
                 process.env = [line.rstrip() for line in f]
 
         if len(pargs) > idx:
@@ -108,32 +108,28 @@ def get_processes(options):
         multiprocesses.append(process)
         idx += 1
 
-    if options.smt:
-        assert(options.cpu_type == "DerivO3CPU")
+    if args.smt:
+        assert(args.cpu_type == "DerivO3CPU")
         return multiprocesses, idx
     else:
         return multiprocesses, 1
 
 
-parser = optparse.OptionParser()
+parser = argparse.ArgumentParser()
 Options.addCommonOptions(parser)
 Options.addSEOptions(parser)
 
 if '--ruby' in sys.argv:
     Ruby.define_options(parser)
 
-(options, args) = parser.parse_args()
-
-if args:
-    print("Error: script doesn't take any positional arguments")
-    sys.exit(1)
+args = parser.parse_args()
 
 multiprocesses = []
 numThreads = 1
 
-if options.bench:
-    apps = options.bench.split("-")
-    if len(apps) != options.num_cpus:
+if args.bench:
+    apps = args.bench.split("-")
+    if len(apps) != args.num_cpus:
         print("number of benchmarks not equal to set num_cpus!")
         sys.exit(1)
 
@@ -141,60 +137,60 @@ def get_processes(options):
         try:
             if buildEnv['TARGET_ISA'] == 'arm':
                 exec("workload = %s('arm_%s', 'linux', '%s')" % (
-                        app, options.arm_iset, options.spec_input))
+                        app, args.arm_iset, args.spec_input))
             else:
                 exec("workload = %s(buildEnv['TARGET_ISA', 'linux', '%s')" % (
-                        app, options.spec_input))
+                        app, args.spec_input))
             multiprocesses.append(workload.makeProcess())
         except:
             print("Unable to find workload for %s: %s" %
                   (buildEnv['TARGET_ISA'], app),
                   file=sys.stderr)
             sys.exit(1)
-elif options.cmd:
-    multiprocesses, numThreads = get_processes(options)
+elif args.cmd:
+    multiprocesses, numThreads = get_processes(args)
 else:
     print("No workload specified. Exiting!\n", file=sys.stderr)
     sys.exit(1)
 
 
-(CPUClass, test_mem_mode, FutureClass) = Simulation.setCPUClass(options)
+(CPUClass, test_mem_mode, FutureClass) = Simulation.setCPUClass(args)
 CPUClass.numThreads = numThreads
 
 # Check -- do not allow SMT with multiple CPUs
-if options.smt and options.num_cpus > 1:
+if args.smt and args.num_cpus > 1:
     fatal("You cannot use SMT with multiple CPUs!")
 
-np = options.num_cpus
+np = args.num_cpus
 mp0_path = multiprocesses[0].executable
 system = System(cpu = [CPUClass(cpu_id=i) for i in range(np)],
                 mem_mode = test_mem_mode,
-                mem_ranges = [AddrRange(options.mem_size)],
-                cache_line_size = options.cacheline_size,
+                mem_ranges = [AddrRange(args.mem_size)],
+                cache_line_size = args.cacheline_size,
                 workload = SEWorkload.init_compatible(mp0_path))
 
 if numThreads > 1:
     system.multi_thread = True
 
 # Create a top-level voltage domain
-system.voltage_domain = VoltageDomain(voltage = options.sys_voltage)
+system.voltage_domain = VoltageDomain(voltage = args.sys_voltage)
 
 # Create a source clock for the system and set the clock period
-system.clk_domain = SrcClockDomain(clock =  options.sys_clock,
+system.clk_domain = SrcClockDomain(clock =  args.sys_clock,
                                    voltage_domain = system.voltage_domain)
 
 # Create a CPU voltage domain
 system.cpu_voltage_domain = VoltageDomain()
 
 # Create a separate clock domain for the CPUs
-system.cpu_clk_domain = SrcClockDomain(clock = options.cpu_clock,
+system.cpu_clk_domain = SrcClockDomain(clock = args.cpu_clock,
                                        voltage_domain =
                                        system.cpu_voltage_domain)
 
 # If elastic tracing is enabled, then configure the cpu and attach the elastic
 # trace probe
-if options.elastic_trace_en:
-    CpuConfig.config_etrace(CPUClass, system.cpu, options)
+if args.elastic_trace_en:
+    CpuConfig.config_etrace(CPUClass, system.cpu, args)
 
 # All cpus belong to a common cpu_clk_domain, therefore running at a common
 # frequency.
@@ -211,42 +207,42 @@ def get_processes(options):
         fatal("KvmCPU can only be used in SE mode with x86")
 
 # Sanity check
-if options.simpoint_profile:
+if args.simpoint_profile:
     if not ObjectList.is_noncaching_cpu(CPUClass):
         fatal("SimPoint/BPProbe should be done with an atomic cpu")
     if np > 1:
         fatal("SimPoint generation not supported with more than one CPUs")
 
 for i in range(np):
-    if options.smt:
+    if args.smt:
         system.cpu[i].workload = multiprocesses
     elif len(multiprocesses) == 1:
         system.cpu[i].workload = multiprocesses[0]
     else:
         system.cpu[i].workload = multiprocesses[i]
 
-    if options.simpoint_profile:
-        system.cpu[i].addSimPointProbe(options.simpoint_interval)
+    if args.simpoint_profile:
+        system.cpu[i].addSimPointProbe(args.simpoint_interval)
 
-    if options.checker:
+    if args.checker:
         system.cpu[i].addCheckerCpu()
 
-    if options.bp_type:
-        bpClass = ObjectList.bp_list.get(options.bp_type)
+    if args.bp_type:
+        bpClass = ObjectList.bp_list.get(args.bp_type)
         system.cpu[i].branchPred = bpClass()
 
-    if options.indirect_bp_type:
+    if args.indirect_bp_type:
         indirectBPClass = \
-            ObjectList.indirect_bp_list.get(options.indirect_bp_type)
+            ObjectList.indirect_bp_list.get(args.indirect_bp_type)
         system.cpu[i].branchPred.indirectBranchPred = indirectBPClass()
 
     system.cpu[i].createThreads()
 
-if options.ruby:
-    Ruby.create_system(options, False, system)
-    assert(options.num_cpus == len(system.ruby._cpu_ports))
+if args.ruby:
+    Ruby.create_system(args, False, system)
+    assert(args.num_cpus == len(system.ruby._cpu_ports))
 
-    system.ruby.clk_domain = SrcClockDomain(clock = options.ruby_clock,
+    system.ruby.clk_domain = SrcClockDomain(clock = args.ruby_clock,
                                         voltage_domain = system.voltage_domain)
     for i in range(np):
         ruby_port = system.ruby._cpu_ports[i]
@@ -259,16 +255,16 @@ def get_processes(options):
         # Connect the cpu's cache ports to Ruby
         ruby_port.connectCpuPorts(system.cpu[i])
 else:
-    MemClass = Simulation.setMemClass(options)
+    MemClass = Simulation.setMemClass(args)
     system.membus = SystemXBar()
     system.system_port = system.membus.slave
-    CacheConfig.config_cache(options, system)
-    MemConfig.config_mem(options, system)
-    config_filesystem(system, options)
+    CacheConfig.config_cache(args, system)
+    MemConfig.config_mem(args, system)
+    config_filesystem(system, args)
 
-if options.wait_gdb:
+if args.wait_gdb:
     for cpu in system.cpu:
         cpu.wait_for_remote_gdb = True
 
 root = Root(full_system = False, system = system)
-Simulation.run(options, root, system, FutureClass)
+Simulation.run(args, root, system, FutureClass)
diff --git a/configs/learning_gem5/part1/two_level.py b/configs/learning_gem5/part1/two_level.py
index 0c0faddaab..4e7e900e07 100644
--- a/configs/learning_gem5/part1/two_level.py
+++ b/configs/learning_gem5/part1/two_level.py
@@ -52,28 +52,20 @@
 # import the SimpleOpts module
 from common import SimpleOpts
 
-# Set the usage message to display
-SimpleOpts.set_usage("usage: %prog [options] <binary to execute>")
-
-# Finalize the arguments and grab the opts so we can pass it on to our objects
-(opts, args) = SimpleOpts.parse_args()
-
 # get ISA for the default binary to run. This is mostly for simple testing
 isa = str(m5.defines.buildEnv['TARGET_ISA']).lower()
 
 # Default to running 'hello', use the compiled ISA to find the binary
 # grab the specific path to the binary
 thispath = os.path.dirname(os.path.realpath(__file__))
-binary = os.path.join(thispath, '../../../',
-                      'tests/test-progs/hello/bin/', isa, 'linux/hello')
+default_binary = os.path.join(thispath, '../../../',
+    'tests/test-progs/hello/bin/', isa, 'linux/hello')
+
+# Binary to execute
+SimpleOpts.add_option("binary", nargs='?', default=default_binary)
 
-# Check if there was a binary passed in via the command line and error if
-# there are too many arguments
-if len(args) == 1:
-    binary = args[0]
-elif len(args) > 1:
-    SimpleOpts.print_help()
-    m5.fatal("Expected a binary to execute as positional argument")
+# Finalize the arguments and grab the args so we can pass it on to our objects
+args = SimpleOpts.parse_args()
 
 # create the system we are going to simulate
 system = System()
@@ -91,8 +83,8 @@
 system.cpu = TimingSimpleCPU()
 
 # Create an L1 instruction and data cache
-system.cpu.icache = L1ICache(opts)
-system.cpu.dcache = L1DCache(opts)
+system.cpu.icache = L1ICache(args)
+system.cpu.dcache = L1DCache(args)
 
 # Connect the instruction and data caches to the CPU
 system.cpu.icache.connectCPU(system.cpu)
@@ -106,7 +98,7 @@
 system.cpu.dcache.connectBus(system.l2bus)
 
 # Create an L2 cache and connect it to the l2bus
-system.l2cache = L2Cache(opts)
+system.l2cache = L2Cache(args)
 system.l2cache.connectCPUSideBus(system.l2bus)
 
 # Create a memory bus
@@ -134,13 +126,13 @@
 system.mem_ctrl.dram.range = system.mem_ranges[0]
 system.mem_ctrl.port = system.membus.master
 
-system.workload = SEWorkload.init_compatible(binary)
+system.workload = SEWorkload.init_compatible(args.binary)
 
 # Create a process for a simple "Hello World" application
 process = Process()
 # Set the command
 # cmd is a list which begins with the executable (like argv)
-process.cmd = [binary]
+process.cmd = [args.binary]
 # Set the cpu to use the process as its workload and create thread contexts
 system.cpu.workload = process
 system.cpu.createThreads()
diff --git a/configs/network/Network.py b/configs/network/Network.py
index 869091298d..91f00766e5 100644
--- a/configs/network/Network.py
+++ b/configs/network/Network.py
@@ -34,44 +34,52 @@ def define_options(parser):
     # By default, ruby uses the simple timing cpu
     parser.set_defaults(cpu_type="TimingSimpleCPU")
 
-    parser.add_option("--topology", type="string", default="Crossbar",
-                      help="check configs/topologies for complete set")
-    parser.add_option("--mesh-rows", type="int", default=0,
-                      help="the number of rows in the mesh topology")
-    parser.add_option("--network", type="choice", default="simple",
-                      choices=['simple', 'garnet'],
-                      help="""'simple'|'garnet' (garnet2.0 will be
-                      deprecated.)""")
-    parser.add_option("--router-latency", action="store", type="int",
-                      default=1,
-                      help="""number of pipeline stages in the garnet router.
-                            Has to be >= 1.
-                            Can be over-ridden on a per router basis
-                            in the topology file.""")
-    parser.add_option("--link-latency", action="store", type="int", default=1,
-                      help="""latency of each link the simple/garnet networks.
-                            Has to be >= 1.
-                            Can be over-ridden on a per link basis
-                            in the topology file.""")
-    parser.add_option("--link-width-bits", action="store", type="int",
-                      default=128,
-                      help="width in bits for all links inside garnet.")
-    parser.add_option("--vcs-per-vnet", action="store", type="int", default=4,
-                      help="""number of virtual channels per virtual network
-                            inside garnet network.""")
-    parser.add_option("--routing-algorithm", action="store", type="int",
-                      default=0,
-                      help="""routing algorithm in network.
-                            0: weight-based table
-                            1: XY (for Mesh. see garnet/RoutingUnit.cc)
-                            2: Custom (see garnet/RoutingUnit.cc""")
-    parser.add_option("--network-fault-model", action="store_true",
-                      default=False,
-                      help="""enable network fault model:
-                            see src/mem/ruby/network/fault_model/""")
-    parser.add_option("--garnet-deadlock-threshold", action="store",
-                      type="int", default=50000,
-                      help="network-level deadlock threshold.")
+    parser.add_argument(
+        "--topology", type=str, default="Crossbar",
+        help="check configs/topologies for complete set")
+    parser.add_argument(
+        "--mesh-rows", type=int, default=0,
+        help="the number of rows in the mesh topology")
+    parser.add_argument(
+        "--network", default="simple",
+        choices=['simple', 'garnet'],
+        help="""'simple'|'garnet' (garnet2.0 will be deprecated.)""")
+    parser.add_argument(
+        "--router-latency", action="store", type=int,
+        default=1,
+        help="""number of pipeline stages in the garnet router.
+            Has to be >= 1.
+            Can be over-ridden on a per router basis
+            in the topology file.""")
+    parser.add_argument(
+        "--link-latency", action="store", type=int, default=1,
+        help="""latency of each link the simple/garnet networks.
+        Has to be >= 1. Can be over-ridden on a per link basis
+        in the topology file.""")
+    parser.add_argument(
+        "--link-width-bits", action="store", type=int,
+        default=128,
+        help="width in bits for all links inside garnet.")
+    parser.add_argument(
+        "--vcs-per-vnet", action="store", type=int, default=4,
+        help="""number of virtual channels per virtual network
+            inside garnet network.""")
+    parser.add_argument(
+        "--routing-algorithm", action="store", type=int,
+        default=0,
+        help="""routing algorithm in network.
+            0: weight-based table
+            1: XY (for Mesh. see garnet/RoutingUnit.cc)
+            2: Custom (see garnet/RoutingUnit.cc""")
+    parser.add_argument(
+        "--network-fault-model", action="store_true",
+        default=False,
+        help="""enable network fault model:
+            see src/mem/ruby/network/fault_model/""")
+    parser.add_argument(
+        "--garnet-deadlock-threshold", action="store",
+        type=int, default=50000,
+        help="network-level deadlock threshold.")
 
 def create_network(options, ruby):
 
diff --git a/configs/nvm/sweep.py b/configs/nvm/sweep.py
index 7ae8ded233..152c391c6e 100644
--- a/configs/nvm/sweep.py
+++ b/configs/nvm/sweep.py
@@ -34,7 +34,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import math
-import optparse
+import argparse
 
 import m5
 from m5.objects import *
@@ -51,36 +51,32 @@
 # and the sequential stride size (how many bytes per activate), and
 # observe what bus utilisation (bandwidth) is achieved
 
-parser = optparse.OptionParser()
+parser = argparse.ArgumentParser()
 
 nvm_generators = {
     "NVM" : lambda x: x.createNvm,
 }
 
 # Use a single-channel DDR3-1600 x64 (8x8 topology) by default
-parser.add_option("--nvm-type", type="choice", default="NVM_2400_1x64",
-                  choices=ObjectList.mem_list.get_names(),
-                  help = "type of memory to use")
+parser.add_argument("--nvm-type", default="NVM_2400_1x64",
+                    choices=ObjectList.mem_list.get_names(),
+                    help = "type of memory to use")
 
-parser.add_option("--nvm-ranks", "-r", type="int", default=1,
-                  help = "Number of ranks to iterate across")
+parser.add_argument("--nvm-ranks", "-r", type=int, default=1,
+                    help = "Number of ranks to iterate across")
 
-parser.add_option("--rd_perc", type="int", default=100,
-                  help = "Percentage of read commands")
+parser.add_argument("--rd_perc", type=int, default=100,
+                    help = "Percentage of read commands")
 
-parser.add_option("--mode", type="choice", default="NVM",
-                  choices=nvm_generators.keys(),
-                  help = "NVM: Random traffic")
+parser.add_argument("--mode", default="NVM",
+                    choices=nvm_generators.keys(),
+                    help = "NVM: Random traffic")
 
-parser.add_option("--addr-map", type="choice",
-                  choices=ObjectList.dram_addr_map_list.get_names(),
-                  default="RoRaBaCoCh", help = "NVM address map policy")
+parser.add_argument("--addr-map",
+                    choices=ObjectList.dram_addr_map_list.get_names(),
+                    default="RoRaBaCoCh", help = "NVM address map policy")
 
-(options, args) = parser.parse_args()
-
-if args:
-    print("Error: script doesn't take any positional arguments")
-    sys.exit(1)
+args = parser.parse_args()
 
 # at the moment we stay with the default open-adaptive page policy,
 # and address mapping
@@ -102,9 +98,9 @@
 
 # force a single channel to match the assumptions in the DRAM traffic
 # generator
-options.mem_channels = 1
-options.external_memory_system = 0
-MemConfig.config_mem(options, system)
+args.mem_channels = 1
+args.external_memory_system = 0
+MemConfig.config_mem(args, system)
 
 # the following assumes that we are using the native memory
 # controller with an NVM interface, check to be sure
@@ -117,7 +113,7 @@
 system.mem_ctrls[0].nvm.null = True
 
 # Set the address mapping based on input argument
-system.mem_ctrls[0].nvm.addr_mapping = options.addr_map
+system.mem_ctrls[0].nvm.addr_mapping = args.addr_map
 
 # stay in each state for 0.25 ms, long enough to warm things up, and
 # short enough to avoid hitting a refresh
@@ -174,16 +170,16 @@
 m5.instantiate()
 
 def trace():
-    addr_map = ObjectList.dram_addr_map_list.get(options.addr_map)
-    generator = nvm_generators[options.mode](system.tgen)
+    addr_map = ObjectList.dram_addr_map_list.get(args.addr_map)
+    generator = nvm_generators[args.mode](system.tgen)
     for stride_size in range(burst_size, max_stride + 1, burst_size):
         for bank in range(1, nbr_banks + 1):
             num_seq_pkts = int(math.ceil(float(stride_size) / burst_size))
             yield generator(period,
                             0, max_addr, burst_size, int(itt), int(itt),
-                            options.rd_perc, 0,
+                            args.rd_perc, 0,
                             num_seq_pkts, buffer_size, nbr_banks, bank,
-                            addr_map, options.nvm_ranks)
+                            addr_map, args.nvm_ranks)
     yield system.tgen.createExit(0)
 
 system.tgen.start(trace())
diff --git a/configs/nvm/sweep_hybrid.py b/configs/nvm/sweep_hybrid.py
index d2f51dd7de..3e7111d4c5 100644
--- a/configs/nvm/sweep_hybrid.py
+++ b/configs/nvm/sweep_hybrid.py
@@ -34,7 +34,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import math
-import optparse
+import argparse
 
 import m5
 from m5.objects import *
@@ -51,46 +51,42 @@
 # and the sequential stride size (how many bytes per activate), and
 # observe what bus utilisation (bandwidth) is achieved
 
-parser = optparse.OptionParser()
+parser = argparse.ArgumentParser()
 
 hybrid_generators = {
     "HYBRID" : lambda x: x.createHybrid,
 }
 
 # Use a single-channel DDR3-1600 x64 (8x8 topology) by default
-parser.add_option("--nvm-type", type="choice", default="NVM_2400_1x64",
-                  choices=ObjectList.mem_list.get_names(),
-                  help = "type of memory to use")
+parser.add_argument("--nvm-type", default="NVM_2400_1x64",
+                    choices=ObjectList.mem_list.get_names(),
+                    help = "type of memory to use")
 
-parser.add_option("--mem-type", type="choice", default="DDR4_2400_16x4",
-                  choices=ObjectList.mem_list.get_names(),
-                  help = "type of memory to use")
+parser.add_argument("--mem-type", default="DDR4_2400_16x4",
+                    choices=ObjectList.mem_list.get_names(),
+                    help = "type of memory to use")
 
-parser.add_option("--nvm-ranks", "-n", type="int", default=1,
-                  help = "Number of ranks to iterate across")
+parser.add_argument("--nvm-ranks", "-n", type=int, default=1,
+                    help = "Number of ranks to iterate across")
 
-parser.add_option("--mem-ranks", "-r", type="int", default=2,
-                  help = "Number of ranks to iterate across")
+parser.add_argument("--mem-ranks", "-r", type=int, default=2,
+                    help = "Number of ranks to iterate across")
 
-parser.add_option("--rd-perc", type="int", default=100,
-                  help = "Percentage of read commands")
+parser.add_argument("--rd-perc", type=int, default=100,
+                    help = "Percentage of read commands")
 
-parser.add_option("--nvm-perc", type="int", default=100,
-                  help = "Percentage of NVM commands")
+parser.add_argument("--nvm-perc", type=int, default=100,
+                    help = "Percentage of NVM commands")
 
-parser.add_option("--mode", type="choice", default="HYBRID",
-                  choices=hybrid_generators.keys(),
-                  help = "Hybrid: Random DRAM + NVM traffic")
+parser.add_argument("--mode", default="HYBRID",
+                    choices=hybrid_generators.keys(),
+                    help = "Hybrid: Random DRAM + NVM traffic")
 
-parser.add_option("--addr-map", type="choice",
-                  choices=ObjectList.dram_addr_map_list.get_names(),
-                  default="RoRaBaCoCh", help = "NVM address map policy")
+parser.add_argument("--addr-map",
+                    choices=ObjectList.dram_addr_map_list.get_names(),
+                    default="RoRaBaCoCh", help = "NVM address map policy")
 
-(options, args) = parser.parse_args()
-
-if args:
-    print("Error: script doesn't take any positional arguments")
-    sys.exit(1)
+args = parser.parse_args()
 
 # at the moment we stay with the default open-adaptive page policy,
 # and address mapping
@@ -114,10 +110,10 @@
 
 # force a single channel to match the assumptions in the DRAM traffic
 # generator
-options.mem_channels = 1
-options.external_memory_system = 0
-options.hybrid_channel = True
-MemConfig.config_mem(options, system)
+args.mem_channels = 1
+args.external_memory_system = 0
+args.hybrid_channel = True
+MemConfig.config_mem(args, system)
 
 # the following assumes that we are using the native controller
 # with NVM and DRAM interfaces, check to be sure
@@ -133,8 +129,8 @@
 system.mem_ctrls[0].nvm.null = True
 
 # Set the address mapping based on input argument
-system.mem_ctrls[0].dram.addr_mapping = options.addr_map
-system.mem_ctrls[0].nvm.addr_mapping = options.addr_map
+system.mem_ctrls[0].dram.addr_mapping = args.addr_map
+system.mem_ctrls[0].nvm.addr_mapping = args.addr_map
 
 # stay in each state for 0.25 ms, long enough to warm things up, and
 # short enough to avoid hitting a refresh
@@ -208,8 +204,8 @@
 m5.instantiate()
 
 def trace():
-    addr_map = ObjectList.dram_addr_map_list.get(options.addr_map)
-    generator = hybrid_generators[options.mode](system.tgen)
+    addr_map = ObjectList.dram_addr_map_list.get(args.addr_map)
+    generator = hybrid_generators[args.mode](system.tgen)
     for stride_size in range(burst_size, max_stride + 1, burst_size):
         num_seq_pkts_dram = int(math.ceil(float(stride_size) /
                                           burst_size_dram))
@@ -218,13 +214,13 @@ def trace():
                         0, max_addr_dram, burst_size_dram,
                         min_addr_nvm, max_addr_nvm, burst_size_nvm,
                         int(itt), int(itt),
-                        options.rd_perc, 0,
+                        args.rd_perc, 0,
                         num_seq_pkts_dram, page_size_dram,
                         nbr_banks_dram, nbr_banks_dram,
                         num_seq_pkts_nvm, buffer_size_nvm,
                         nbr_banks_nvm, nbr_banks_nvm,
-                        addr_map, options.mem_ranks,
-                        options.nvm_ranks, options.nvm_perc)
+                        addr_map, args.mem_ranks,
+                        args.nvm_ranks, args.nvm_perc)
 
     yield system.tgen.createExit(0)
 
diff --git a/configs/ruby/AMD_Base_Constructor.py b/configs/ruby/AMD_Base_Constructor.py
index 6f13c1e0f2..cd4733ba0b 100644
--- a/configs/ruby/AMD_Base_Constructor.py
+++ b/configs/ruby/AMD_Base_Constructor.py
@@ -99,7 +99,7 @@ def create(self, options, ruby_system, system):
             self.recycle_latency = options.recycle_latency
 
 def define_options(parser):
-    parser.add_option("--cpu-to-dir-latency", type="int", default=15)
+    parser.add_argument("--cpu-to-dir-latency", type=int, default=15)
 
 def construct(options, system, ruby_system):
     if (buildEnv['PROTOCOL'] != 'GPU_VIPER' or
diff --git a/configs/ruby/CHI.py b/configs/ruby/CHI.py
index a48a100a00..e4a2477491 100644
--- a/configs/ruby/CHI.py
+++ b/configs/ruby/CHI.py
@@ -39,9 +39,9 @@
 from .Ruby import create_topology
 
 def define_options(parser):
-    parser.add_option("--chi-config", action="store", type="string",
-                      default=None,
-                      help="NoC config. parameters and bindings. "
+    parser.add_argument("--chi-config", action="store", type=str,
+                        default=None,
+                        help="NoC config. parameters and bindings. "
                            "Required for CustomMesh topology")
 
 def read_config_file(file):
diff --git a/configs/ruby/GPU_VIPER.py b/configs/ruby/GPU_VIPER.py
index 9841213129..01ec602c5e 100644
--- a/configs/ruby/GPU_VIPER.py
+++ b/configs/ruby/GPU_VIPER.py
@@ -345,54 +345,56 @@ def connectWireBuffers(self, req_to_dir, resp_to_dir, l3_unblock_to_dir,
         self.probeToL3 = probe_to_l3
         self.respToL3 = resp_to_l3
 
+
 def define_options(parser):
-    parser.add_option("--num-subcaches", type = "int", default = 4)
-    parser.add_option("--l3-data-latency", type = "int", default = 20)
-    parser.add_option("--l3-tag-latency", type = "int", default = 15)
-    parser.add_option("--cpu-to-dir-latency", type = "int", default = 120)
-    parser.add_option("--gpu-to-dir-latency", type = "int", default = 120)
-    parser.add_option("--no-resource-stalls", action = "store_false",
-                      default = True)
-    parser.add_option("--no-tcc-resource-stalls", action = "store_false",
-                      default = True)
-    parser.add_option("--use-L3-on-WT", action = "store_true", default = False)
-    parser.add_option("--num-tbes", type = "int", default = 256)
-    parser.add_option("--l2-latency", type = "int", default = 50)  # load to use
-    parser.add_option("--num-tccs", type = "int", default = 1,
-                      help = "number of TCC banks in the GPU")
-    parser.add_option("--sqc-size", type = 'string', default = '32kB',
-                      help = "SQC cache size")
-    parser.add_option("--sqc-assoc", type = 'int', default = 8,
-                      help = "SQC cache assoc")
-    parser.add_option("--sqc-deadlock-threshold", type='int',
-                      help="Set the SQC deadlock threshold to some value")
-
-    parser.add_option("--WB_L1", action = "store_true", default = False,
-                      help = "writeback L1")
-    parser.add_option("--WB_L2", action = "store_true", default = False,
-                      help = "writeback L2")
-    parser.add_option("--TCP_latency", type = "int", default = 4,
-                      help = "TCP latency")
-    parser.add_option("--TCC_latency", type = "int", default = 16,
-                      help = "TCC latency")
-    parser.add_option("--tcc-size", type = 'string', default = '256kB',
-                      help = "agregate tcc size")
-    parser.add_option("--tcc-assoc", type = 'int', default = 16,
-                      help = "tcc assoc")
-    parser.add_option("--tcp-size", type = 'string', default = '16kB',
-                      help = "tcp size")
-    parser.add_option("--tcp-assoc", type = 'int', default = 16,
-                      help = "tcp assoc")
-    parser.add_option("--tcp-deadlock-threshold", type='int',
-                      help="Set the TCP deadlock threshold to some value")
-    parser.add_option("--max-coalesces-per-cycle", type="int", default=1,
-                      help="Maximum insts that may coalesce in a cycle");
-
-    parser.add_option("--noL1", action = "store_true", default = False,
-                      help = "bypassL1")
-    parser.add_option("--scalar-buffer-size", type = 'int', default = 128,
-                      help="Size of the mandatory queue in the GPU scalar "
-                      "cache controller")
+    parser.add_argument("--num-subcaches", type=int, default=4)
+    parser.add_argument("--l3-data-latency", type=int, default=20)
+    parser.add_argument("--l3-tag-latency", type=int, default=15)
+    parser.add_argument("--cpu-to-dir-latency", type=int, default=120)
+    parser.add_argument("--gpu-to-dir-latency", type=int, default=120)
+    parser.add_argument("--no-resource-stalls", action="store_false",
+                        default=True)
+    parser.add_argument("--no-tcc-resource-stalls", action="store_false",
+                        default=True)
+    parser.add_argument("--use-L3-on-WT", action="store_true", default=False)
+    parser.add_argument("--num-tbes", type=int, default=256)
+    parser.add_argument("--l2-latency", type=int, default=50)  # load to use
+    parser.add_argument("--num-tccs", type=int, default=1,
+                        help="number of TCC banks in the GPU")
+    parser.add_argument("--sqc-size", type=str, default='32kB',
+                        help="SQC cache size")
+    parser.add_argument("--sqc-assoc", type=int, default=8,
+                        help="SQC cache assoc")
+    parser.add_argument("--sqc-deadlock-threshold", type=int,
+                        help="Set the SQC deadlock threshold to some value")
+
+    parser.add_argument("--WB_L1", action="store_true", default=False,
+                        help="writeback L1")
+    parser.add_argument("--WB_L2", action="store_true", default=False,
+                        help="writeback L2")
+    parser.add_argument("--TCP_latency", type=int, default=4,
+                        help="TCP latency")
+    parser.add_argument("--TCC_latency", type=int, default=16,
+                        help="TCC latency")
+    parser.add_argument("--tcc-size", type=str, default='256kB',
+                        help="agregate tcc size")
+    parser.add_argument("--tcc-assoc", type=int, default=16,
+                        help="tcc assoc")
+    parser.add_argument("--tcp-size", type=str, default='16kB',
+                        help="tcp size")
+    parser.add_argument("--tcp-assoc", type=int, default=16,
+                        help="tcp assoc")
+    parser.add_argument("--tcp-deadlock-threshold", type=int,
+                        help="Set the TCP deadlock threshold to some value")
+    parser.add_argument("--max-coalesces-per-cycle", type=int, default=1,
+                        help="Maximum insts that may coalesce in a cycle")
+
+    parser.add_argument("--noL1", action="store_true", default=False,
+                        help="bypassL1")
+    parser.add_argument("--scalar-buffer-size", type=int, default=128,
+                        help="Size of the mandatory queue in the GPU scalar "
+                        "cache controller")
+
 
 def create_system(options, full_system, system, dma_devices, bootmem,
                   ruby_system, cpus):
diff --git a/configs/ruby/MESI_Three_Level.py b/configs/ruby/MESI_Three_Level.py
index 860023b4d8..4088ecd3e7 100644
--- a/configs/ruby/MESI_Three_Level.py
+++ b/configs/ruby/MESI_Three_Level.py
@@ -43,18 +43,20 @@ class L1Cache(RubyCache): pass
 class L2Cache(RubyCache): pass
 
 def define_options(parser):
-    parser.add_option("--num-clusters", type = "int", default = 1,
-            help = "number of clusters in a design in which there are shared\
+    parser.add_argument(
+        "--num-clusters", type=int, default=1,
+        help="number of clusters in a design in which there are shared\
             caches private to clusters")
-    parser.add_option("--l0i_size", type="string", default="4096B")
-    parser.add_option("--l0d_size", type="string", default="4096B")
-    parser.add_option("--l0i_assoc", type="int", default=1)
-    parser.add_option("--l0d_assoc", type="int", default=1)
-    parser.add_option("--l0_transitions_per_cycle", type="int", default=32)
-    parser.add_option("--l1_transitions_per_cycle", type="int", default=32)
-    parser.add_option("--l2_transitions_per_cycle", type="int", default=4)
-    parser.add_option("--enable-prefetch", action="store_true", default=False,\
-                        help="Enable Ruby hardware prefetcher")
+    parser.add_argument("--l0i_size", type=str, default="4096B")
+    parser.add_argument("--l0d_size", type=str, default="4096B")
+    parser.add_argument("--l0i_assoc", type=int, default=1)
+    parser.add_argument("--l0d_assoc", type=int, default=1)
+    parser.add_argument("--l0_transitions_per_cycle", type=int, default=32)
+    parser.add_argument("--l1_transitions_per_cycle", type=int, default=32)
+    parser.add_argument("--l2_transitions_per_cycle", type=int, default=4)
+    parser.add_argument(
+        "--enable-prefetch", action="store_true", default=False,
+        help="Enable Ruby hardware prefetcher")
     return
 
 def create_system(options, full_system, system, dma_ports, bootmem,
diff --git a/configs/ruby/MESI_Three_Level_HTM.py b/configs/ruby/MESI_Three_Level_HTM.py
index 946b6412b1..7a900c1eef 100644
--- a/configs/ruby/MESI_Three_Level_HTM.py
+++ b/configs/ruby/MESI_Three_Level_HTM.py
@@ -43,18 +43,19 @@ class L1Cache(RubyCache): pass
 class L2Cache(RubyCache): pass
 
 def define_options(parser):
-    parser.add_option("--num-clusters", type = "int", default = 1,
-            help = "number of clusters in a design in which there are shared\
-            caches private to clusters")
-    parser.add_option("--l0i_size", type="string", default="4096B")
-    parser.add_option("--l0d_size", type="string", default="4096B")
-    parser.add_option("--l0i_assoc", type="int", default=1)
-    parser.add_option("--l0d_assoc", type="int", default=1)
-    parser.add_option("--l0_transitions_per_cycle", type="int", default=32)
-    parser.add_option("--l1_transitions_per_cycle", type="int", default=32)
-    parser.add_option("--l2_transitions_per_cycle", type="int", default=4)
-    parser.add_option("--enable-prefetch", action="store_true", default=False,\
-                        help="Enable Ruby hardware prefetcher")
+    parser.add_argument("--num-clusters", type=int, default=1,
+        help = "number of clusters in a design in which there are shared\
+        caches private to clusters")
+    parser.add_argument("--l0i_size", type=str, default="4096B")
+    parser.add_argument("--l0d_size", type=str, default="4096B")
+    parser.add_argument("--l0i_assoc", type=int, default=1)
+    parser.add_argument("--l0d_assoc", type=int, default=1)
+    parser.add_argument("--l0_transitions_per_cycle", type=int, default=32)
+    parser.add_argument("--l1_transitions_per_cycle", type=int, default=32)
+    parser.add_argument("--l2_transitions_per_cycle", type=int, default=4)
+    parser.add_argument(
+        "--enable-prefetch", action="store_true", default=False,
+        help="Enable Ruby hardware prefetcher")
     return
 
 def create_system(options, full_system, system, dma_ports, bootmem,
diff --git a/configs/ruby/MOESI_AMD_Base.py b/configs/ruby/MOESI_AMD_Base.py
index eb008ea580..6de466bef4 100644
--- a/configs/ruby/MOESI_AMD_Base.py
+++ b/configs/ruby/MOESI_AMD_Base.py
@@ -197,14 +197,14 @@ def connectWireBuffers(self, req_to_dir, resp_to_dir, l3_unblock_to_dir,
         self.respToL3 = resp_to_l3
 
 def define_options(parser):
-    parser.add_option("--num-subcaches", type="int", default=4)
-    parser.add_option("--l3-data-latency", type="int", default=20)
-    parser.add_option("--l3-tag-latency", type="int", default=15)
-    parser.add_option("--cpu-to-dir-latency", type="int", default=15)
-    parser.add_option("--no-resource-stalls", action="store_false",
-                      default=True)
-    parser.add_option("--num-tbes", type="int", default=256)
-    parser.add_option("--l2-latency", type="int", default=50) # load to use
+    parser.add_argument("--num-subcaches", type=int, default=4)
+    parser.add_argument("--l3-data-latency", type=int, default=20)
+    parser.add_argument("--l3-tag-latency", type=int, default=15)
+    parser.add_argument("--cpu-to-dir-latency", type=int, default=15)
+    parser.add_argument("--no-resource-stalls", action="store_false",
+                        default=True)
+    parser.add_argument("--num-tbes", type=int, default=256)
+    parser.add_argument("--l2-latency", type=int, default=50) # load to use
 
 def create_system(options, full_system, system, dma_devices, bootmem,
                   ruby_system):
diff --git a/configs/ruby/MOESI_CMP_token.py b/configs/ruby/MOESI_CMP_token.py
index 91d02a0147..925bfd363f 100644
--- a/configs/ruby/MOESI_CMP_token.py
+++ b/configs/ruby/MOESI_CMP_token.py
@@ -39,14 +39,18 @@ class L1Cache(RubyCache): pass
 class L2Cache(RubyCache): pass
 
 def define_options(parser):
-    parser.add_option("--l1-retries", type="int", default=1,
-                      help="Token_CMP: # of l1 retries before going persistent")
-    parser.add_option("--timeout-latency", type="int", default=300,
-                      help="Token_CMP: cycles until issuing again");
-    parser.add_option("--disable-dyn-timeouts", action="store_true",
-          help="Token_CMP: disable dyanimc timeouts, use fixed latency instead")
-    parser.add_option("--allow-atomic-migration", action="store_true",
-          help="allow migratory sharing for atomic only accessed blocks")
+    parser.add_argument(
+        "--l1-retries", type=int, default=1,
+        help="Token_CMP: # of l1 retries before going persistent")
+    parser.add_argument(
+        "--timeout-latency", type=int, default=300,
+        help="Token_CMP: cycles until issuing again");
+    parser.add_argument(
+        "--disable-dyn-timeouts", action="store_true",
+        help="Token_CMP: disable dyanimc timeouts, use fixed latency instead")
+    parser.add_argument(
+        "--allow-atomic-migration", action="store_true",
+        help="allow migratory sharing for atomic only accessed blocks")
 
 def create_system(options, full_system, system, dma_ports, bootmem,
                   ruby_system, cpus):
diff --git a/configs/ruby/MOESI_hammer.py b/configs/ruby/MOESI_hammer.py
index adff1c9a38..a120597cc3 100644
--- a/configs/ruby/MOESI_hammer.py
+++ b/configs/ruby/MOESI_hammer.py
@@ -44,11 +44,11 @@ class L2Cache(RubyCache): pass
 class ProbeFilter(RubyCache): pass
 
 def define_options(parser):
-    parser.add_option("--allow-atomic-migration", action="store_true",
+    parser.add_argument("--allow-atomic-migration", action="store_true",
           help="allow migratory sharing for atomic only accessed blocks")
-    parser.add_option("--pf-on", action="store_true",
+    parser.add_argument("--pf-on", action="store_true",
           help="Hammer: enable Probe Filter")
-    parser.add_option("--dir-on", action="store_true",
+    parser.add_argument("--dir-on", action="store_true",
           help="Hammer: enable Full-bit Directory")
 
 def create_system(options, full_system, system, dma_ports, bootmem,
diff --git a/configs/ruby/Ruby.py b/configs/ruby/Ruby.py
index e7609f42d4..79c039f07b 100644
--- a/configs/ruby/Ruby.py
+++ b/configs/ruby/Ruby.py
@@ -56,36 +56,43 @@ def define_options(parser):
     # By default, ruby uses the simple timing cpu
     parser.set_defaults(cpu_type="TimingSimpleCPU")
 
-    parser.add_option("--ruby-clock", action="store", type="string",
-                      default='2GHz',
-                      help="Clock for blocks running at Ruby system's speed")
+    parser.add_argument(
+        "--ruby-clock", action="store", type=str,
+        default='2GHz',
+        help="Clock for blocks running at Ruby system's speed")
 
-    parser.add_option("--access-backing-store", action="store_true", default=False,
-                      help="Should ruby maintain a second copy of memory")
+    parser.add_argument(
+        "--access-backing-store", action="store_true", default=False,
+        help="Should ruby maintain a second copy of memory")
 
     # Options related to cache structure
-    parser.add_option("--ports", action="store", type="int", default=4,
-                      help="used of transitions per cycle which is a proxy \
-                            for the number of ports.")
+    parser.add_argument(
+        "--ports", action="store", type=int, default=4,
+        help="used of transitions per cycle which is a proxy \
+            for the number of ports.")
 
     # network options are in network/Network.py
 
     # ruby mapping options
-    parser.add_option("--numa-high-bit", type="int", default=0,
-                      help="high order address bit to use for numa mapping. " \
-                           "0 = highest bit, not specified = lowest bit")
-    parser.add_option("--interleaving-bits", type="int", default=0,
-                      help="number of bits to specify interleaving " \
-                           "in directory, memory controllers and caches. "
-                           "0 = not specified")
-    parser.add_option("--xor-low-bit", type="int", default=20,
-                      help="hashing bit for channel selection" \
-                           "see MemConfig for explanation of the default"\
-                           "parameter. If set to 0, xor_high_bit is also"\
-                           "set to 0.")
-
-    parser.add_option("--recycle-latency", type="int", default=10,
-                      help="Recycle latency for ruby controller input buffers")
+    parser.add_argument(
+        "--numa-high-bit", type=int, default=0,
+        help="high order address bit to use for numa mapping. "
+        "0 = highest bit, not specified = lowest bit")
+    parser.add_argument(
+        "--interleaving-bits", type=int, default=0,
+        help="number of bits to specify interleaving " \
+           "in directory, memory controllers and caches. "
+           "0 = not specified")
+    parser.add_argument(
+        "--xor-low-bit", type=int, default=20,
+        help="hashing bit for channel selection" \
+           "see MemConfig for explanation of the default"\
+           "parameter. If set to 0, xor_high_bit is also"\
+           "set to 0.")
+
+    parser.add_argument(
+        "--recycle-latency", type=int, default=10,
+        help="Recycle latency for ruby controller input buffers")
 
     protocol = buildEnv['PROTOCOL']
     exec("from . import %s" % protocol)
diff --git a/configs/splash2/cluster.py b/configs/splash2/cluster.py
index b5e77cd1f3..84719d8d9c 100644
--- a/configs/splash2/cluster.py
+++ b/configs/splash2/cluster.py
@@ -29,7 +29,7 @@
 # "m5 test.py"
 
 import os
-import optparse
+import argparse
 import sys
 
 import m5
@@ -39,94 +39,90 @@
 # Define Command Line Options
 # ====================
 
-parser = optparse.OptionParser()
-
-parser.add_option("-d", "--detailed", action="store_true")
-parser.add_option("-t", "--timing", action="store_true")
-parser.add_option("-m", "--maxtick", type="int")
-parser.add_option("-c", "--numclusters",
-                  help="Number of clusters", type="int")
-parser.add_option("-n", "--numcpus",
-                  help="Number of cpus in total", type="int")
-parser.add_option("-f", "--frequency",
-                  default = "1GHz",
-                  help="Frequency of each CPU")
-parser.add_option("--l1size",
-                  default = "32kB")
-parser.add_option("--l1latency",
-                  default = 1)
-parser.add_option("--l2size",
-                  default = "256kB")
-parser.add_option("--l2latency",
-                  default = 10)
-parser.add_option("--rootdir",
-                  help="ROot directory of Splash2",
-                  default="/dist/splash2/codes/")
-parser.add_option("-b", "--benchmark",
-                  help="Splash 2 benchmark to run")
-
-(options, args) = parser.parse_args()
-
-if args:
-    print("Error: script doesn't take any positional arguments")
-    sys.exit(1)
+parser = argparse.ArgumentParser()
+
+parser.add_argument("-d", "--detailed", action="store_true")
+parser.add_argument("-t", "--timing", action="store_true")
+parser.add_argument("-m", "--maxtick", type=int)
+parser.add_argument("-c", "--numclusters",
+                    help="Number of clusters", type=int)
+parser.add_argument("-n", "--numcpus",
+                    help="Number of cpus in total", type=int)
+parser.add_argument("-f", "--frequency",
+                    default = "1GHz",
+                    help="Frequency of each CPU")
+parser.add_argument("--l1size",
+                    default = "32kB")
+parser.add_argument("--l1latency",
+                    default = 1)
+parser.add_argument("--l2size",
+                    default = "256kB")
+parser.add_argument("--l2latency",
+                    default = 10)
+parser.add_argument("--rootdir",
+                    help="ROot directory of Splash2",
+                    default="/dist/splash2/codes/")
+parser.add_argument("-b", "--benchmark",
+                    help="Splash 2 benchmark to run")
+
+args = parser.parse_args()
 
 # --------------------
 # Define Splash2 Benchmarks
 # ====================
 class Cholesky(Process):
-        executable = options.rootdir + '/kernels/cholesky/CHOLESKY'
-        cmd = 'CHOLESKY -p' + str(options.numcpus) + ' '\
-             + options.rootdir + '/kernels/cholesky/inputs/tk23.O'
+        executable = args.rootdir + '/kernels/cholesky/CHOLESKY'
+        cmd = 'CHOLESKY -p' + str(args.numcpus) + ' '\
+             + args.rootdir + '/kernels/cholesky/inputs/tk23.O'
 
 class FFT(Process):
-        executable = options.rootdir + 'kernels/fft/FFT'
-        cmd = 'FFT -p' + str(options.numcpus) + ' -m18'
+        executable = args.rootdir + 'kernels/fft/FFT'
+        cmd = 'FFT -p' + str(args.numcpus) + ' -m18'
 
 class LU_contig(Process):
-        executable = options.rootdir + 'kernels/lu/contiguous_blocks/LU'
-        cmd = 'LU -p' + str(options.numcpus)
+        executable = args.rootdir + 'kernels/lu/contiguous_blocks/LU'
+        cmd = 'LU -p' + str(args.numcpus)
 
 class LU_noncontig(Process):
-        executable = options.rootdir + 'kernels/lu/non_contiguous_blocks/LU'
-        cmd = 'LU -p' + str(options.numcpus)
+        executable = args.rootdir + 'kernels/lu/non_contiguous_blocks/LU'
+        cmd = 'LU -p' + str(args.numcpus)
 
 class Radix(Process):
-        executable = options.rootdir + 'kernels/radix/RADIX'
-        cmd = 'RADIX -n524288 -p' + str(options.numcpus)
+        executable = args.rootdir + 'kernels/radix/RADIX'
+        cmd = 'RADIX -n524288 -p' + str(args.numcpus)
 
 class Barnes(Process):
-        executable = options.rootdir + 'apps/barnes/BARNES'
+        executable = args.rootdir + 'apps/barnes/BARNES'
         cmd = 'BARNES'
-        input = options.rootdir + 'apps/barnes/input.p' + str(options.numcpus)
+        input = args.rootdir + 'apps/barnes/input.p' + str(args.numcpus)
 
 class FMM(Process):
-        executable = options.rootdir + 'apps/fmm/FMM'
+        executable = args.rootdir + 'apps/fmm/FMM'
         cmd = 'FMM'
-        input = options.rootdir + 'apps/fmm/inputs/input.2048.p' + str(options.numcpus)
+        input = args.rootdir + 'apps/fmm/inputs/input.2048.p' + str(args.numcpus)
 
 class Ocean_contig(Process):
-        executable = options.rootdir + 'apps/ocean/contiguous_partitions/OCEAN'
-        cmd = 'OCEAN -p' + str(options.numcpus)
+        executable = args.rootdir + 'apps/ocean/contiguous_partitions/OCEAN'
+        cmd = 'OCEAN -p' + str(args.numcpus)
 
 class Ocean_noncontig(Process):
-        executable = options.rootdir + 'apps/ocean/non_contiguous_partitions/OCEAN'
-        cmd = 'OCEAN -p' + str(options.numcpus)
+        executable = args.rootdir + 'apps/ocean/non_contiguous_partitions/OCEAN'
+        cmd = 'OCEAN -p' + str(args.numcpus)
 
 class Raytrace(Process):
-        executable = options.rootdir + 'apps/raytrace/RAYTRACE'
-        cmd = 'RAYTRACE -p' + str(options.numcpus) + ' ' \
-             + options.rootdir + 'apps/raytrace/inputs/teapot.env'
+        executable = args.rootdir + 'apps/raytrace/RAYTRACE'
+        cmd = 'RAYTRACE -p' + str(args.numcpus) + ' ' \
+             + args.rootdir + 'apps/raytrace/inputs/teapot.env'
 
 class Water_nsquared(Process):
-        executable = options.rootdir + 'apps/water-nsquared/WATER-NSQUARED'
+        executable = args.rootdir + 'apps/water-nsquared/WATER-NSQUARED'
         cmd = 'WATER-NSQUARED'
-        input = options.rootdir + 'apps/water-nsquared/input.p' + str(options.numcpus)
+        input = args.rootdir + 'apps/water-nsquared/input.p' + str(args.numcpus)
 
 class Water_spatial(Process):
-        executable = options.rootdir + 'apps/water-spatial/WATER-SPATIAL'
+        executable = args.rootdir + 'apps/water-spatial/WATER-SPATIAL'
         cmd = 'WATER-SPATIAL'
-        input = options.rootdir + 'apps/water-spatial/input.p' + str(options.numcpus)
+        input = args.rootdir + 'apps/water-spatial/input.p' + str(args.numcpus)
 
 
 # --------------------
@@ -134,7 +130,7 @@ class Water_spatial(Process):
 # ====================
 
 class L1(Cache):
-    latency = options.l1latency
+    latency = args.l1latency
     mshrs = 12
     tgts_per_mshr = 8
 
@@ -143,7 +139,7 @@ class L1(Cache):
 # ----------------------
 
 class L2(Cache):
-    latency = options.l2latency
+    latency = args.l2latency
     mshrs = 92
     tgts_per_mshr = 16
     write_buffers = 8
@@ -154,52 +150,52 @@ class L2(Cache):
 class Cluster:
     pass
 
-cpusPerCluster = options.numcpus/options.numclusters
+cpusPerCluster = args.numcpus/args.numclusters
 
-busFrequency = Frequency(options.frequency)
+busFrequency = Frequency(args.frequency)
 busFrequency *= cpusPerCluster
 
 all_cpus = []
 all_l1s = []
 all_l1buses = []
-if options.timing:
-    clusters = [ Cluster() for i in range(options.numclusters)]
-    for j in range(options.numclusters):
+if args.timing:
+    clusters = [ Cluster() for i in range(args.numclusters)]
+    for j in range(args.numclusters):
         clusters[j].id = j
     for cluster in clusters:
         cluster.clusterbus = L2XBar(clock=busFrequency)
         all_l1buses += [cluster.clusterbus]
         cluster.cpus = [TimingSimpleCPU(cpu_id = i + cluster.id,
-                                        clock=options.frequency)
+                                        clock=args.frequency)
                         for i in range(cpusPerCluster)]
         all_cpus += cluster.cpus
-        cluster.l1 = L1(size=options.l1size, assoc = 4)
+        cluster.l1 = L1(size=args.l1size, assoc = 4)
         all_l1s += [cluster.l1]
-elif options.detailed:
-    clusters = [ Cluster() for i in range(options.numclusters)]
-    for j in range(options.numclusters):
+elif args.detailed:
+    clusters = [ Cluster() for i in range(args.numclusters)]
+    for j in range(args.numclusters):
         clusters[j].id = j
     for cluster in clusters:
         cluster.clusterbus = L2XBar(clock=busFrequency)
         all_l1buses += [cluster.clusterbus]
         cluster.cpus = [DerivO3CPU(cpu_id = i + cluster.id,
-                                   clock=options.frequency)
+                                   clock=args.frequency)
                         for i in range(cpusPerCluster)]
         all_cpus += cluster.cpus
-        cluster.l1 = L1(size=options.l1size, assoc = 4)
+        cluster.l1 = L1(size=args.l1size, assoc = 4)
         all_l1s += [cluster.l1]
 else:
-    clusters = [ Cluster() for i in range(options.numclusters)]
-    for j in range(options.numclusters):
+    clusters = [ Cluster() for i in range(args.numclusters)]
+    for j in range(args.numclusters):
         clusters[j].id = j
     for cluster in clusters:
         cluster.clusterbus = L2XBar(clock=busFrequency)
         all_l1buses += [cluster.clusterbus]
         cluster.cpus = [AtomicSimpleCPU(cpu_id = i + cluster.id,
-                                        clock=options.frequency)
+                                        clock=args.frequency)
                         for i in range(cpusPerCluster)]
         all_cpus += cluster.cpus
-        cluster.l1 = L1(size=options.l1size, assoc = 4)
+        cluster.l1 = L1(size=args.l1size, assoc = 4)
         all_l1s += [cluster.l1]
 
 # ----------------------
@@ -211,7 +207,7 @@ class Cluster:
 system.clock = '1GHz'
 
 system.toL2bus = L2XBar(clock = busFrequency)
-system.l2 = L2(size = options.l2size, assoc = 8)
+system.l2 = L2(size = args.l2size, assoc = 8)
 
 # ----------------------
 # Connect the L2 cache and memory together
@@ -240,29 +236,29 @@ class Cluster:
 # --------------------
 # Pick the correct Splash2 Benchmarks
 # ====================
-if options.benchmark == 'Cholesky':
+if args.benchmark == 'Cholesky':
     root.workload = Cholesky()
-elif options.benchmark == 'FFT':
+elif args.benchmark == 'FFT':
     root.workload = FFT()
-elif options.benchmark == 'LUContig':
+elif args.benchmark == 'LUContig':
     root.workload = LU_contig()
-elif options.benchmark == 'LUNoncontig':
+elif args.benchmark == 'LUNoncontig':
     root.workload = LU_noncontig()
-elif options.benchmark == 'Radix':
+elif args.benchmark == 'Radix':
     root.workload = Radix()
-elif options.benchmark == 'Barnes':
+elif args.benchmark == 'Barnes':
     root.workload = Barnes()
-elif options.benchmark == 'FMM':
+elif args.benchmark == 'FMM':
     root.workload = FMM()
-elif options.benchmark == 'OceanContig':
+elif args.benchmark == 'OceanContig':
     root.workload = Ocean_contig()
-elif options.benchmark == 'OceanNoncontig':
+elif args.benchmark == 'OceanNoncontig':
     root.workload = Ocean_noncontig()
-elif options.benchmark == 'Raytrace':
+elif args.benchmark == 'Raytrace':
     root.workload = Raytrace()
-elif options.benchmark == 'WaterNSquared':
+elif args.benchmark == 'WaterNSquared':
     root.workload = Water_nsquared()
-elif options.benchmark == 'WaterSpatial':
+elif args.benchmark == 'WaterSpatial':
     root.workload = Water_spatial()
 else:
     m5.util.panic("""
@@ -285,15 +281,15 @@ class Cluster:
 # Run the simulation
 # ----------------------
 
-if options.timing or options.detailed:
+if args.timing or args.detailed:
     root.system.mem_mode = 'timing'
 
 # instantiate configuration
 m5.instantiate()
 
 # simulate until program terminates
-if options.maxtick:
-    exit_event = m5.simulate(options.maxtick)
+if args.maxtick:
+    exit_event = m5.simulate(args.maxtick)
 else:
     exit_event = m5.simulate(m5.MaxTick)
 
diff --git a/configs/splash2/run.py b/configs/splash2/run.py
index 38fdbc89be..d90c779903 100644
--- a/configs/splash2/run.py
+++ b/configs/splash2/run.py
@@ -28,7 +28,7 @@
 #
 
 import os
-import optparse
+import argparse
 import sys
 
 import m5
@@ -38,37 +38,33 @@
 # Define Command Line Options
 # ====================
 
-parser = optparse.OptionParser()
-
-parser.add_option("-d", "--detailed", action="store_true")
-parser.add_option("-t", "--timing", action="store_true")
-parser.add_option("-m", "--maxtick", type="int")
-parser.add_option("-n", "--numcpus",
-                  help="Number of cpus in total", type="int")
-parser.add_option("-f", "--frequency",
-                  default = "1GHz",
-                  help="Frequency of each CPU")
-parser.add_option("--l1size",
-                  default = "32kB")
-parser.add_option("--l1latency",
-                  default = "1ns")
-parser.add_option("--l2size",
-                  default = "256kB")
-parser.add_option("--l2latency",
-                  default = "10ns")
-parser.add_option("--rootdir",
-                  help="Root directory of Splash2",
-                  default="/dist/splash2/codes")
-parser.add_option("-b", "--benchmark",
-                  help="Splash 2 benchmark to run")
-
-(options, args) = parser.parse_args()
-
-if args:
-    print("Error: script doesn't take any positional arguments")
-    sys.exit(1)
-
-if not options.numcpus:
+parser = argparse.ArgumentParser()
+
+parser.add_argument("-d", "--detailed", action="store_true")
+parser.add_argument("-t", "--timing", action="store_true")
+parser.add_argument("-m", "--maxtick", type=int)
+parser.add_argument("-n", "--numcpus",
+                    help="Number of cpus in total", type=int)
+parser.add_argument("-f", "--frequency",
+                    default = "1GHz",
+                    help="Frequency of each CPU")
+parser.add_argument("--l1size",
+                    default = "32kB")
+parser.add_argument("--l1latency",
+                    default = "1ns")
+parser.add_argument("--l2size",
+                    default = "256kB")
+parser.add_argument("--l2latency",
+                    default = "10ns")
+parser.add_argument("--rootdir",
+                    help="Root directory of Splash2",
+                    default="/dist/splash2/codes")
+parser.add_argument("-b", "--benchmark",
+                    help="Splash 2 benchmark to run")
+
+args = parser.parse_args()
+
+if not args.numcpus:
     print("Specify the number of cpus with -n")
     sys.exit(1)
 
@@ -76,86 +72,86 @@
 # Define Splash2 Benchmarks
 # ====================
 class Cholesky(Process):
-    cwd = options.rootdir + '/kernels/cholesky'
-    executable = options.rootdir + '/kernels/cholesky/CHOLESKY'
-    cmd = ['CHOLESKY', '-p' +  str(options.numcpus),
-            options.rootdir + '/kernels/cholesky/inputs/tk23.O']
+    cwd = args.rootdir + '/kernels/cholesky'
+    executable = args.rootdir + '/kernels/cholesky/CHOLESKY'
+    cmd = ['CHOLESKY', '-p' +  str(args.numcpus),
+            args.rootdir + '/kernels/cholesky/inputs/tk23.O']
 
 class FFT(Process):
-    cwd = options.rootdir + '/kernels/fft'
-    executable = options.rootdir + '/kernels/fft/FFT'
-    cmd = ['FFT', '-p', str(options.numcpus), '-m18']
+    cwd = args.rootdir + '/kernels/fft'
+    executable = args.rootdir + '/kernels/fft/FFT'
+    cmd = ['FFT', '-p', str(args.numcpus), '-m18']
 
 class LU_contig(Process):
-    executable = options.rootdir + '/kernels/lu/contiguous_blocks/LU'
-    cmd = ['LU', '-p', str(options.numcpus)]
-    cwd = options.rootdir + '/kernels/lu/contiguous_blocks'
+    executable = args.rootdir + '/kernels/lu/contiguous_blocks/LU'
+    cmd = ['LU', '-p', str(args.numcpus)]
+    cwd = args.rootdir + '/kernels/lu/contiguous_blocks'
 
 class LU_noncontig(Process):
-    executable = options.rootdir + '/kernels/lu/non_contiguous_blocks/LU'
-    cmd = ['LU', '-p', str(options.numcpus)]
-    cwd = options.rootdir + '/kernels/lu/non_contiguous_blocks'
+    executable = args.rootdir + '/kernels/lu/non_contiguous_blocks/LU'
+    cmd = ['LU', '-p', str(args.numcpus)]
+    cwd = args.rootdir + '/kernels/lu/non_contiguous_blocks'
 
 class Radix(Process):
-    executable = options.rootdir + '/kernels/radix/RADIX'
-    cmd = ['RADIX', '-n524288', '-p', str(options.numcpus)]
-    cwd = options.rootdir + '/kernels/radix'
+    executable = args.rootdir + '/kernels/radix/RADIX'
+    cmd = ['RADIX', '-n524288', '-p', str(args.numcpus)]
+    cwd = args.rootdir + '/kernels/radix'
 
 class Barnes(Process):
-    executable = options.rootdir + '/apps/barnes/BARNES'
+    executable = args.rootdir + '/apps/barnes/BARNES'
     cmd = ['BARNES']
-    input = options.rootdir + '/apps/barnes/input.p' + str(options.numcpus)
-    cwd = options.rootdir + '/apps/barnes'
+    input = args.rootdir + '/apps/barnes/input.p' + str(args.numcpus)
+    cwd = args.rootdir + '/apps/barnes'
 
 class FMM(Process):
-    executable = options.rootdir + '/apps/fmm/FMM'
+    executable = args.rootdir + '/apps/fmm/FMM'
     cmd = ['FMM']
-    if str(options.numcpus) == '1':
-        input = options.rootdir + '/apps/fmm/inputs/input.2048'
+    if str(args.numcpus) == '1':
+        input = args.rootdir + '/apps/fmm/inputs/input.2048'
     else:
-        input = options.rootdir + '/apps/fmm/inputs/input.2048.p' + str(options.numcpus)
-    cwd = options.rootdir + '/apps/fmm'
+        input = args.rootdir + '/apps/fmm/inputs/input.2048.p' + str(args.numcpus)
+    cwd = args.rootdir + '/apps/fmm'
 
 class Ocean_contig(Process):
-    executable = options.rootdir + '/apps/ocean/contiguous_partitions/OCEAN'
-    cmd = ['OCEAN', '-p', str(options.numcpus)]
-    cwd = options.rootdir + '/apps/ocean/contiguous_partitions'
+    executable = args.rootdir + '/apps/ocean/contiguous_partitions/OCEAN'
+    cmd = ['OCEAN', '-p', str(args.numcpus)]
+    cwd = args.rootdir + '/apps/ocean/contiguous_partitions'
 
 class Ocean_noncontig(Process):
-    executable = options.rootdir + '/apps/ocean/non_contiguous_partitions/OCEAN'
-    cmd = ['OCEAN', '-p', str(options.numcpus)]
-    cwd = options.rootdir + '/apps/ocean/non_contiguous_partitions'
+    executable = args.rootdir + '/apps/ocean/non_contiguous_partitions/OCEAN'
+    cmd = ['OCEAN', '-p', str(args.numcpus)]
+    cwd = args.rootdir + '/apps/ocean/non_contiguous_partitions'
 
 class Raytrace(Process):
-    executable = options.rootdir + '/apps/raytrace/RAYTRACE'
-    cmd = ['RAYTRACE', '-p' + str(options.numcpus),
-           options.rootdir + '/apps/raytrace/inputs/teapot.env']
-    cwd = options.rootdir + '/apps/raytrace'
+    executable = args.rootdir + '/apps/raytrace/RAYTRACE'
+    cmd = ['RAYTRACE', '-p' + str(args.numcpus),
+           args.rootdir + '/apps/raytrace/inputs/teapot.env']
+    cwd = args.rootdir + '/apps/raytrace'
 
 class Water_nsquared(Process):
-    executable = options.rootdir + '/apps/water-nsquared/WATER-NSQUARED'
+    executable = args.rootdir + '/apps/water-nsquared/WATER-NSQUARED'
     cmd = ['WATER-NSQUARED']
-    if options.numcpus==1:
-        input = options.rootdir + '/apps/water-nsquared/input'
+    if args.numcpus==1:
+        input = args.rootdir + '/apps/water-nsquared/input'
     else:
-        input = options.rootdir + '/apps/water-nsquared/input.p' + str(options.numcpus)
-    cwd = options.rootdir + '/apps/water-nsquared'
+        input = args.rootdir + '/apps/water-nsquared/input.p' + str(args.numcpus)
+    cwd = args.rootdir + '/apps/water-nsquared'
 
 class Water_spatial(Process):
-    executable = options.rootdir + '/apps/water-spatial/WATER-SPATIAL'
+    executable = args.rootdir + '/apps/water-spatial/WATER-SPATIAL'
     cmd = ['WATER-SPATIAL']
-    if options.numcpus==1:
-        input = options.rootdir + '/apps/water-spatial/input'
+    if args.numcpus==1:
+        input = args.rootdir + '/apps/water-spatial/input'
     else:
-        input = options.rootdir + '/apps/water-spatial/input.p' + str(options.numcpus)
-    cwd = options.rootdir + '/apps/water-spatial'
+        input = args.rootdir + '/apps/water-spatial/input.p' + str(args.numcpus)
+    cwd = args.rootdir + '/apps/water-spatial'
 
 # --------------------
 # Base L1 Cache Definition
 # ====================
 
 class L1(Cache):
-    latency = options.l1latency
+    latency = args.l1latency
     mshrs = 12
     tgts_per_mshr = 8
 
@@ -164,7 +160,7 @@ class L1(Cache):
 # ----------------------
 
 class L2(Cache):
-    latency = options.l2latency
+    latency = args.l2latency
     mshrs = 92
     tgts_per_mshr = 16
     write_buffers = 8
@@ -173,20 +169,20 @@ class L2(Cache):
 # Define the cpus
 # ----------------------
 
-busFrequency = Frequency(options.frequency)
+busFrequency = Frequency(args.frequency)
 
-if options.timing:
+if args.timing:
     cpus = [TimingSimpleCPU(cpu_id = i,
-                            clock=options.frequency)
-            for i in range(options.numcpus)]
-elif options.detailed:
+                            clock=args.frequency)
+            for i in range(args.numcpus)]
+elif args.detailed:
     cpus = [DerivO3CPU(cpu_id = i,
-                       clock=options.frequency)
-            for i in range(options.numcpus)]
+                       clock=args.frequency)
+            for i in range(args.numcpus)]
 else:
     cpus = [AtomicSimpleCPU(cpu_id = i,
-                            clock=options.frequency)
-            for i in range(options.numcpus)]
+                            clock=args.frequency)
+            for i in range(args.numcpus)]
 
 # ----------------------
 # Create a system, and add system wide objects
@@ -196,7 +192,7 @@ class L2(Cache):
 system.clock = '1GHz'
 
 system.toL2bus = L2XBar(clock = busFrequency)
-system.l2 = L2(size = options.l2size, assoc = 8)
+system.l2 = L2(size = args.l2size, assoc = 8)
 
 # ----------------------
 # Connect the L2 cache and memory together
@@ -211,8 +207,8 @@ class L2(Cache):
 # Connect the L2 cache and clusters together
 # ----------------------
 for cpu in cpus:
-    cpu.addPrivateSplitL1Caches(L1(size = options.l1size, assoc = 1),
-                                L1(size = options.l1size, assoc = 4))
+    cpu.addPrivateSplitL1Caches(L1(size = args.l1size, assoc = 1),
+                                L1(size = args.l1size, assoc = 4))
     # connect cpu level-1 caches to shared level-2 cache
     cpu.connectAllPorts(system.toL2bus, system.membus)
 
@@ -226,29 +222,29 @@ class L2(Cache):
 # --------------------
 # Pick the correct Splash2 Benchmarks
 # ====================
-if options.benchmark == 'Cholesky':
+if args.benchmark == 'Cholesky':
     root.workload = Cholesky()
-elif options.benchmark == 'FFT':
+elif args.benchmark == 'FFT':
     root.workload = FFT()
-elif options.benchmark == 'LUContig':
+elif args.benchmark == 'LUContig':
     root.workload = LU_contig()
-elif options.benchmark == 'LUNoncontig':
+elif args.benchmark == 'LUNoncontig':
     root.workload = LU_noncontig()
-elif options.benchmark == 'Radix':
+elif args.benchmark == 'Radix':
     root.workload = Radix()
-elif options.benchmark == 'Barnes':
+elif args.benchmark == 'Barnes':
     root.workload = Barnes()
-elif options.benchmark == 'FMM':
+elif args.benchmark == 'FMM':
     root.workload = FMM()
-elif options.benchmark == 'OceanContig':
+elif args.benchmark == 'OceanContig':
     root.workload = Ocean_contig()
-elif options.benchmark == 'OceanNoncontig':
+elif args.benchmark == 'OceanNoncontig':
     root.workload = Ocean_noncontig()
-elif options.benchmark == 'Raytrace':
+elif args.benchmark == 'Raytrace':
     root.workload = Raytrace()
-elif options.benchmark == 'WaterNSquared':
+elif args.benchmark == 'WaterNSquared':
     root.workload = Water_nsquared()
-elif options.benchmark == 'WaterSpatial':
+elif args.benchmark == 'WaterSpatial':
     root.workload = Water_spatial()
 else:
     print("The --benchmark environment variable was set to something "
@@ -270,15 +266,15 @@ class L2(Cache):
 # Run the simulation
 # ----------------------
 
-if options.timing or options.detailed:
+if args.timing or args.detailed:
     root.system.mem_mode = 'timing'
 
 # instantiate configuration
 m5.instantiate()
 
 # simulate until program terminates
-if options.maxtick:
-    exit_event = m5.simulate(options.maxtick)
+if args.maxtick:
+    exit_event = m5.simulate(args.maxtick)
 else:
     exit_event = m5.simulate(m5.MaxTick)
 
diff --git a/tests/configs/gpu-randomtest-ruby.py b/tests/configs/gpu-randomtest-ruby.py
index 246affa7eb..0d1171c7c4 100644
--- a/tests/configs/gpu-randomtest-ruby.py
+++ b/tests/configs/gpu-randomtest-ruby.py
@@ -37,47 +37,47 @@
 from m5.objects import *
 from m5.defines import buildEnv
 from m5.util import addToPath
-import os, optparse, sys
+import os, argparse, sys
 
 m5.util.addToPath('../configs/')
 
 from ruby import Ruby
 from common import Options
 
-parser = optparse.OptionParser()
+parser = argparse.ArgumentParser()
 Options.addCommonOptions(parser)
 
 # add the gpu specific options expected by the the gpu and gpu_RfO
-parser.add_option("-u", "--num-compute-units", type="int", default=8,
-                  help="number of compute units in the GPU")
-parser.add_option("--num-cp", type="int", default=0,
-                  help="Number of GPU Command Processors (CP)")
-parser.add_option("--simds-per-cu", type="int", default=4, help="SIMD units" \
-                  "per CU")
-parser.add_option("--wf-size", type="int", default=64,
-                  help="Wavefront size(in workitems)")
-parser.add_option("--wfs-per-simd", type="int", default=10, help="Number of " \
-                  "WF slots per SIMD")
+parser.add_argument("-u", "--num-compute-units", type=int, default=8,
+                    help="number of compute units in the GPU")
+parser.add_argument("--num-cp", type=int, default=0,
+                    help="Number of GPU Command Processors (CP)")
+parser.add_argument("--simds-per-cu", type=int, default=4, help="SIMD units" \
+                    "per CU")
+parser.add_argument("--wf-size", type=int, default=64,
+                    help="Wavefront size(in workitems)")
+parser.add_argument("--wfs-per-simd", type=int, default=10, help="Number of " \
+                    "WF slots per SIMD")
 
 # Add the ruby specific and protocol specific options
 Ruby.define_options(parser)
 
-(options, args) = parser.parse_args()
+args = parser.parse_args()
 
 #
 # Set the default cache size and associativity to be very small to encourage
 # races between requests and writebacks.
 #
-options.l1d_size="256B"
-options.l1i_size="256B"
-options.l2_size="512B"
-options.l3_size="1kB"
-options.l1d_assoc=2
-options.l1i_assoc=2
-options.l2_assoc=2
-options.l3_assoc=2
-options.num_compute_units=8
-options.num_sqc=2
+args.l1d_size="256B"
+args.l1i_size="256B"
+args.l2_size="512B"
+args.l3_size="1kB"
+args.l1d_assoc=2
+args.l1i_assoc=2
+args.l2_assoc=2
+args.l3_assoc=2
+args.num_compute_units=8
+args.num_sqc=2
 
 # Check to for the GPU_RfO protocol.  Other GPU protocols are non-SC and will
 # not work with the Ruby random tester.
@@ -87,14 +87,14 @@
 # create the tester and system, including ruby
 #
 tester = RubyTester(check_flush = False, checks_to_complete = 100,
-                    wakeup_frequency = 10, num_cpus = options.num_cpus)
+                    wakeup_frequency = 10, num_cpus = args.num_cpus)
 
 # We set the testers as cpu for ruby to find the correct clock domains
 # for the L1 Objects.
 system = System(cpu = tester)
 
 # Dummy voltage domain for all our clock domains
-system.voltage_domain = VoltageDomain(voltage = options.sys_voltage)
+system.voltage_domain = VoltageDomain(voltage = args.sys_voltage)
 system.clk_domain = SrcClockDomain(clock = '1GHz',
                                    voltage_domain = system.voltage_domain)
 
@@ -105,8 +105,8 @@
 # is stored in system.cpu. because there is only ever one
 # tester object, num_cpus is not necessarily equal to the
 # size of system.cpu
-cpu_list = [ system.cpu ] * options.num_cpus
-Ruby.create_system(options, False, system, cpus=cpu_list)
+cpu_list = [ system.cpu ] * args.num_cpus
+Ruby.create_system(args, False, system, cpus=cpu_list)
 
 # Create a separate clock domain for Ruby
 system.ruby.clk_domain = SrcClockDomain(clock = '1GHz',
diff --git a/tests/configs/gpu-ruby.py b/tests/configs/gpu-ruby.py
index b561d02281..fc8f47896a 100644
--- a/tests/configs/gpu-ruby.py
+++ b/tests/configs/gpu-ruby.py
@@ -37,7 +37,7 @@
 from m5.objects import *
 from m5.defines import buildEnv
 from m5.util import addToPath
-import os, optparse, sys, math, glob
+import os, argparse, sys, math, glob
 
 m5.util.addToPath('../configs/')
 
@@ -45,26 +45,6 @@
 from common import Options
 from common import GPUTLBOptions, GPUTLBConfig
 
-########################## Script Options ########################
-def setOption(parser, opt_str, value = 1):
-    # check to make sure the option actually exists
-    if not parser.has_option(opt_str):
-        raise Exception("cannot find %s in list of possible options" % opt_str)
-
-    opt = parser.get_option(opt_str)
-    # set the value
-    exec("parser.values.%s = %s" % (opt.dest, value))
-
-def getOption(parser, opt_str):
-    # check to make sure the option actually exists
-    if not parser.has_option(opt_str):
-        raise Exception("cannot find %s in list of possible options" % opt_str)
-
-    opt = parser.get_option(opt_str)
-    # get the value
-    exec("return_value = parser.values.%s" % opt.dest)
-    return return_value
-
 def run_test(root):
     """gpu test requires a specialized run_test implementation to set up the
     mmio space."""
@@ -79,100 +59,129 @@ def run_test(root):
     exit_event = m5.simulate(maxtick)
     print('Exiting @ tick', m5.curTick(), 'because', exit_event.getCause())
 
-parser = optparse.OptionParser()
+parser = argparse.ArgumentParser()
 Options.addCommonOptions(parser)
 Options.addSEOptions(parser)
 
-parser.add_option("-k", "--kernel-files",
-                  help="file(s) containing GPU kernel code (colon separated)")
-parser.add_option("-u", "--num-compute-units", type="int", default=2,
-                  help="number of GPU compute units"),
-parser.add_option("--num-cp", type="int", default=0,
-                  help="Number of GPU Command Processors (CP)")
-parser.add_option("--simds-per-cu", type="int", default=4, help="SIMD units" \
-                  "per CU")
-parser.add_option("--cu-per-sqc", type="int", default=4, help="number of CUs" \
-                  "sharing an SQC (icache, and thus icache TLB)")
-parser.add_option("--wf-size", type="int", default=64,
-                  help="Wavefront size(in workitems)")
-parser.add_option("--wfs-per-simd", type="int", default=8, help="Number of " \
-                  "WF slots per SIMD")
-parser.add_option("--sp-bypass-path-length", type="int", default=4, \
-                  help="Number of stages of bypass path in vector ALU for Single "\
-                  "Precision ops")
-parser.add_option("--dp-bypass-path-length", type="int", default=4, \
-                  help="Number of stages of bypass path in vector ALU for Double "\
-                  "Precision ops")
-parser.add_option("--issue-period", type="int", default=4, \
-                  help="Number of cycles per vector instruction issue period")
-parser.add_option("--glbmem-wr-bus-width", type="int", default=32, \
-                  help="VGPR to Coalescer (Global Memory) data bus width in bytes")
-parser.add_option("--glbmem-rd-bus-width", type="int", default=32, \
-                  help="Coalescer to VGPR (Global Memory) data bus width in bytes")
-parser.add_option("--shr-mem-pipes-per-cu", type="int", default=1, \
-                  help="Number of Shared Memory pipelines per CU")
-parser.add_option("--glb-mem-pipes-per-cu", type="int", default=1, \
-                  help="Number of Global Memory pipelines per CU")
-parser.add_option("--vreg-file-size", type="int", default=2048,
-                  help="number of physical vector registers per SIMD")
-parser.add_option("--bw-scalor", type="int", default=0,
-                  help="bandwidth scalor for scalability analysis")
-parser.add_option("--CPUClock", type="string", default="2GHz",
-                  help="CPU clock")
-parser.add_option("--GPUClock", type="string", default="1GHz",
-                  help="GPU clock")
-parser.add_option("--cpu-voltage", action="store", type="string",
-                  default='1.0V',
-                  help = """CPU  voltage domain""")
-parser.add_option("--gpu-voltage", action="store", type="string",
-                  default='1.0V',
-                  help = """CPU  voltage domain""")
-parser.add_option("--CUExecPolicy", type="string", default="OLDEST-FIRST",
-                  help="WF exec policy (OLDEST-FIRST, ROUND-ROBIN)")
-parser.add_option("--xact-cas-mode", action="store_true",
-                  help="enable load_compare mode (transactional CAS)")
-parser.add_option("--SegFaultDebug",action="store_true",
-                 help="checks for GPU seg fault before TLB access")
-parser.add_option("--LocalMemBarrier",action="store_true",
-                 help="Barrier does not wait for writethroughs to complete")
-parser.add_option("--countPages", action="store_true",
-                 help="Count Page Accesses and output in per-CU output files")
-parser.add_option("--TLB-prefetch", type="int", help = "prefetch depth for"\
-                  "TLBs")
-parser.add_option("--pf-type", type="string", help="type of prefetch: "\
-                  "PF_CU, PF_WF, PF_PHASE, PF_STRIDE")
-parser.add_option("--pf-stride", type="int", help="set prefetch stride")
-parser.add_option("--numLdsBanks", type="int", default=32,
-                  help="number of physical banks per LDS module")
-parser.add_option("--ldsBankConflictPenalty", type="int", default=1,
-                  help="number of cycles per LDS bank conflict")
+parser.add_argument(
+    "-k", "--kernel-files",
+    help="file(s) containing GPU kernel code (colon separated)")
+parser.add_argument(
+    "-u", "--num-compute-units", type=int, default=2,
+    help="number of GPU compute units"),
+parser.add_argument(
+    "--num-cp", type=int, default=0,
+    help="Number of GPU Command Processors (CP)")
+parser.add_argument(
+    "--simds-per-cu", type=int, default=4, help="SIMD units" \
+    "per CU")
+parser.add_argument(
+    "--cu-per-sqc", type=int, default=4, help="number of CUs" \
+    "sharing an SQC (icache, and thus icache TLB)")
+parser.add_argument(
+    "--wf-size", type=int, default=64,
+    help="Wavefront size(in workitems)")
+parser.add_argument(
+    "--wfs-per-simd", type=int, default=8, help="Number of " \
+    "WF slots per SIMD")
+parser.add_argument(
+    "--sp-bypass-path-length", type=int, default=4,
+    help="Number of stages of bypass path in vector ALU for Single "
+    "Precision ops")
+parser.add_argument(
+    "--dp-bypass-path-length", type=int, default=4,
+    help="Number of stages of bypass path in vector ALU for Double "
+    "Precision ops")
+parser.add_argument(
+    "--issue-period", type=int, default=4,
+    help="Number of cycles per vector instruction issue period")
+parser.add_argument(
+    "--glbmem-wr-bus-width", type=int, default=32,
+    help="VGPR to Coalescer (Global Memory) data bus width in bytes")
+parser.add_argument(
+    "--glbmem-rd-bus-width", type=int, default=32,
+    help="Coalescer to VGPR (Global Memory) data bus width in bytes")
+parser.add_argument(
+    "--shr-mem-pipes-per-cu", type=int, default=1, \
+    help="Number of Shared Memory pipelines per CU")
+parser.add_argument(
+    "--glb-mem-pipes-per-cu", type=int, default=1, \
+    help="Number of Global Memory pipelines per CU")
+parser.add_argument(
+    "--vreg-file-size", type=int, default=2048,
+    help="number of physical vector registers per SIMD")
+parser.add_argument(
+    "--bw-scalor", type=int, default=0,
+    help="bandwidth scalor for scalability analysis")
+parser.add_argument(
+    "--CPUClock", type=str, default="2GHz",
+    help="CPU clock")
+parser.add_argument(
+    "--GPUClock", type=str, default="1GHz",
+    help="GPU clock")
+parser.add_argument(
+    "--cpu-voltage", action="store", type=str,
+    default='1.0V',
+    help = """CPU  voltage domain""")
+parser.add_argument(
+    "--gpu-voltage", action="store", type=str,
+    default='1.0V',
+    help = """CPU  voltage domain""")
+parser.add_argument(
+    "--CUExecPolicy", type=str, default="OLDEST-FIRST",
+    help="WF exec policy (OLDEST-FIRST, ROUND-ROBIN)")
+parser.add_argument(
+    "--xact-cas-mode", action="store_true",
+    help="enable load_compare mode (transactional CAS)")
+parser.add_argument(
+    "--SegFaultDebug",action="store_true",
+    help="checks for GPU seg fault before TLB access")
+parser.add_argument(
+    "--LocalMemBarrier",action="store_true",
+    help="Barrier does not wait for writethroughs to complete")
+parser.add_argument(
+    "--countPages", action="store_true",
+    help="Count Page Accesses and output in per-CU output files")
+parser.add_argument(
+    "--TLB-prefetch", type=int, help = "prefetch depth for"\
+    "TLBs")
+parser.add_argument(
+    "--pf-type", type=str, help="type of prefetch: "\
+    "PF_CU, PF_WF, PF_PHASE, PF_STRIDE")
+parser.add_argument("--pf-stride", type=int, help="set prefetch stride")
+parser.add_argument(
+    "--numLdsBanks", type=int, default=32,
+    help="number of physical banks per LDS module")
+parser.add_argument(
+    "--ldsBankConflictPenalty", type=int, default=1,
+    help="number of cycles per LDS bank conflict")
 
 # Add the ruby specific and protocol specific options
 Ruby.define_options(parser)
 
 GPUTLBOptions.tlb_options(parser)
 
-(options, args) = parser.parse_args()
+args = parser.parse_args()
 
 # The GPU cache coherence protocols only work with the backing store
-setOption(parser, "--access-backing-store")
+args.access_backing_store = True
 
 # Currently, the sqc (I-Cache of GPU) is shared by
 # multiple compute units(CUs). The protocol works just fine
 # even if sqc is not shared. Overriding this option here
 # so that the user need not explicitly set this (assuming
 # sharing sqc is the common usage)
-n_cu = options.num_compute_units
-num_sqc = int(math.ceil(float(n_cu) / options.cu_per_sqc))
-options.num_sqc = num_sqc # pass this to Ruby
+n_cu = args.num_compute_units
+num_sqc = int(math.ceil(float(n_cu) / args.cu_per_sqc))
+args.num_sqc = num_sqc # pass this to Ruby
 
 ########################## Creating the GPU system ########################
 # shader is the GPU
-shader = Shader(n_wf = options.wfs_per_simd,
+shader = Shader(n_wf = args.wfs_per_simd,
                 clk_domain = SrcClockDomain(
-                    clock = options.GPUClock,
+                    clock = args.GPUClock,
                     voltage_domain = VoltageDomain(
-                        voltage = options.gpu_voltage)),
+                        voltage = args.gpu_voltage)),
                 timing = True)
 
 # GPU_RfO(Read For Ownership) implements SC/TSO memory model.
@@ -190,51 +199,51 @@ def run_test(root):
 
 # Switching off per-lane TLB by default
 per_lane = False
-if options.TLB_config == "perLane":
+if args.TLB_config == "perLane":
     per_lane = True
 
 # List of compute units; one GPU can have multiple compute units
 compute_units = []
 for i in range(n_cu):
     compute_units.append(ComputeUnit(cu_id = i, perLaneTLB = per_lane,
-                                     num_SIMDs = options.simds_per_cu,
-                                     wfSize = options.wf_size,
+                                     num_SIMDs = args.simds_per_cu,
+                                     wfSize = args.wf_size,
                                      spbypass_pipe_length = \
-                                     options.sp_bypass_path_length,
+                                     args.sp_bypass_path_length,
                                      dpbypass_pipe_length = \
-                                     options.dp_bypass_path_length,
-                                     issue_period = options.issue_period,
+                                     args.dp_bypass_path_length,
+                                     issue_period = args.issue_period,
                                      coalescer_to_vrf_bus_width = \
-                                     options.glbmem_rd_bus_width,
+                                     args.glbmem_rd_bus_width,
                                      vrf_to_coalescer_bus_width = \
-                                     options.glbmem_wr_bus_width,
+                                     args.glbmem_wr_bus_width,
                                      num_global_mem_pipes = \
-                                     options.glb_mem_pipes_per_cu,
+                                     args.glb_mem_pipes_per_cu,
                                      num_shared_mem_pipes = \
-                                     options.shr_mem_pipes_per_cu,
-                                     n_wf = options.wfs_per_simd,
-                                     execPolicy = options.CUExecPolicy,
-                                     xactCasMode = options.xact_cas_mode,
-                                     debugSegFault = options.SegFaultDebug,
+                                     args.shr_mem_pipes_per_cu,
+                                     n_wf = args.wfs_per_simd,
+                                     execPolicy = args.CUExecPolicy,
+                                     xactCasMode = args.xact_cas_mode,
+                                     debugSegFault = args.SegFaultDebug,
                                      functionalTLB = True,
-                                     localMemBarrier = options.LocalMemBarrier,
-                                     countPages = options.countPages,
+                                     localMemBarrier = args.LocalMemBarrier,
+                                     countPages = args.countPages,
                                      localDataStore = \
-                                     LdsState(banks = options.numLdsBanks,
+                                     LdsState(banks = args.numLdsBanks,
                                               bankConflictPenalty = \
-                                              options.ldsBankConflictPenalty)))
+                                              args.ldsBankConflictPenalty)))
     wavefronts = []
     vrfs = []
-    for j in range(options.simds_per_cu):
+    for j in range(args.simds_per_cu):
         for k in range(int(shader.n_wf)):
             wavefronts.append(Wavefront(simdId = j, wf_slot_id = k))
         vrfs.append(VectorRegisterFile(simd_id=j,
-                              num_regs_per_simd=options.vreg_file_size))
+                              num_regs_per_simd=args.vreg_file_size))
     compute_units[-1].wavefronts = wavefronts
     compute_units[-1].vector_register_file = vrfs
-    if options.TLB_prefetch:
-        compute_units[-1].prefetch_depth = options.TLB_prefetch
-        compute_units[-1].prefetch_prev_type = options.pf_type
+    if args.TLB_prefetch:
+        compute_units[-1].prefetch_depth = args.TLB_prefetch
+        compute_units[-1].prefetch_prev_type = args.pf_type
 
     # attach the LDS and the CU to the bus (actually a Bridge)
     compute_units[-1].ldsPort = compute_units[-1].ldsBus.slave
@@ -245,7 +254,7 @@ def run_test(root):
 
 # this is a uniprocessor only test, thus the shader is the second index in the
 # list of "system.cpus"
-options.num_cpus = 1
+args.num_cpus = 1
 shader_idx = 1
 cpu = TimingSimpleCPU(cpu_id=0)
 
@@ -258,12 +267,12 @@ def run_test(root):
 cpu_list = [cpu] + [shader] + [dispatcher]
 
 system = System(cpu = cpu_list,
-                mem_ranges = [AddrRange(options.mem_size)],
+                mem_ranges = [AddrRange(args.mem_size)],
                 mem_mode = 'timing',
                 workload = SEWorkload())
 
 # Dummy voltage domain for all our clock domains
-system.voltage_domain = VoltageDomain(voltage = options.sys_voltage)
+system.voltage_domain = VoltageDomain(voltage = args.sys_voltage)
 system.clk_domain = SrcClockDomain(clock = '1GHz',
                                    voltage_domain = system.voltage_domain)
 
@@ -274,15 +283,15 @@ def run_test(root):
                                           system.voltage_domain)
 
 # configure the TLB hierarchy
-GPUTLBConfig.config_tlb_hierarchy(options, system, shader_idx)
+GPUTLBConfig.config_tlb_hierarchy(args, system, shader_idx)
 
 # create Ruby system
 system.piobus = IOXBar(width=32, response_latency=0,
                        frontend_latency=0, forward_latency=0)
-Ruby.create_system(options, None, system)
+Ruby.create_system(args, None, system)
 
 # Create a separate clock for Ruby
-system.ruby.clk_domain = SrcClockDomain(clock = options.ruby_clock,
+system.ruby.clk_domain = SrcClockDomain(clock = args.ruby_clock,
                                         voltage_domain = system.voltage_domain)
 
 # create the interrupt controller
@@ -303,10 +312,10 @@ def run_test(root):
 # per compute unit and one sequencer per SQC for the math to work out
 # correctly.
 gpu_port_idx = len(system.ruby._cpu_ports) \
-               - options.num_compute_units - options.num_sqc
-gpu_port_idx = gpu_port_idx - options.num_cp * 2
+               - args.num_compute_units - args.num_sqc
+gpu_port_idx = gpu_port_idx - args.num_cp * 2
 
-wavefront_size = options.wf_size
+wavefront_size = args.wf_size
 for i in range(n_cu):
     # The pipeline issues wavefront_size number of uncoalesced requests
     # in one GPU issue cycle. Hence wavefront_size mem ports.
@@ -316,14 +325,14 @@ def run_test(root):
     gpu_port_idx += 1
 
 for i in range(n_cu):
-    if i > 0 and not i % options.cu_per_sqc:
+    if i > 0 and not i % args.cu_per_sqc:
         gpu_port_idx += 1
     system.cpu[shader_idx].CUs[i].sqc_port = \
             system.ruby._cpu_ports[gpu_port_idx].slave
 gpu_port_idx = gpu_port_idx + 1
 
 # Current regression tests do not support the command processor
-assert(options.num_cp == 0)
+assert(args.num_cp == 0)
 
 # connect dispatcher to the system.piobus
 dispatcher.pio = system.piobus.master
diff --git a/tests/configs/memtest-ruby.py b/tests/configs/memtest-ruby.py
index 7aca77f951..f8fbd10639 100644
--- a/tests/configs/memtest-ruby.py
+++ b/tests/configs/memtest-ruby.py
@@ -29,34 +29,34 @@
 from m5.objects import *
 from m5.defines import buildEnv
 from m5.util import addToPath
-import os, optparse, sys
+import os, argparse, sys
 
 m5.util.addToPath('../configs/')
 
 from ruby import Ruby
 from common import Options
 
-parser = optparse.OptionParser()
+parser = argparse.ArgumentParser()
 Options.addCommonOptions(parser)
 
 # Add the ruby specific and protocol specific options
 Ruby.define_options(parser)
 
-(options, args) = parser.parse_args()
+args = parser.parse_args()
 
 #
 # Set the default cache size and associativity to be very small to encourage
 # races between requests and writebacks.
 #
-options.l1d_size="256B"
-options.l1i_size="256B"
-options.l2_size="512B"
-options.l3_size="1kB"
-options.l1d_assoc=2
-options.l1i_assoc=2
-options.l2_assoc=2
-options.l3_assoc=2
-options.ports=32
+args.l1d_size="256B"
+args.l1i_size="256B"
+args.l2_size="512B"
+args.l3_size="1kB"
+args.l1d_assoc=2
+args.l1i_assoc=2
+args.l2_assoc=2
+args.l3_assoc=2
+args.ports=32
 
 #MAX CORES IS 8 with the fals sharing method
 nb_cores = 8
@@ -66,8 +66,8 @@
                  percent_uncacheable=0, suppress_func_errors=True) \
          for i in range(nb_cores) ]
 
-# overwrite options.num_cpus with the nb_cores value
-options.num_cpus = nb_cores
+# overwrite args.num_cpus with the nb_cores value
+args.num_cpus = nb_cores
 
 # system simulated
 system = System(cpu = cpus)
@@ -87,10 +87,10 @@
 
 system.mem_ranges = AddrRange('256MB')
 
-Ruby.create_system(options, False, system)
+Ruby.create_system(args, False, system)
 
 # Create a separate clock domain for Ruby
-system.ruby.clk_domain = SrcClockDomain(clock = options.ruby_clock,
+system.ruby.clk_domain = SrcClockDomain(clock = args.ruby_clock,
                                         voltage_domain = system.voltage_domain)
 
 assert(len(cpus) == len(system.ruby._cpu_ports))
diff --git a/tests/configs/pc-simple-timing-ruby.py b/tests/configs/pc-simple-timing-ruby.py
index 884fd7da28..16dc72806e 100644
--- a/tests/configs/pc-simple-timing-ruby.py
+++ b/tests/configs/pc-simple-timing-ruby.py
@@ -24,7 +24,7 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import m5, os, optparse, sys
+import m5, os, argparse, sys
 from m5.objects import *
 m5.util.addToPath('../configs/')
 from common.Benchmarks import SysConfig
@@ -33,28 +33,28 @@
 from common import Options
 
 # Add the ruby specific and protocol specific options
-parser = optparse.OptionParser()
+parser = argparse.ArgumentParser()
 Options.addCommonOptions(parser)
 Ruby.define_options(parser)
-(options, args) = parser.parse_args()
+args = parser.parse_args()
 
 # Set the default cache size and associativity to be very small to encourage
 # races between requests and writebacks.
-options.l1d_size="32kB"
-options.l1i_size="32kB"
-options.l2_size="4MB"
-options.l1d_assoc=2
-options.l1i_assoc=2
-options.l2_assoc=2
-options.num_cpus = 2
+args.l1d_size="32kB"
+args.l1i_size="32kB"
+args.l2_size="4MB"
+args.l1d_assoc=2
+args.l1i_assoc=2
+args.l2_assoc=2
+args.num_cpus = 2
 
 #the system
 mdesc = SysConfig(disks = ['linux-x86.img'])
-system = FSConfig.makeLinuxX86System('timing', options.num_cpus,
+system = FSConfig.makeLinuxX86System('timing', args.num_cpus,
                                      mdesc=mdesc, Ruby=True)
 system.kernel = SysPaths.binary('x86_64-vmlinux-2.6.22.9')
 # Dummy voltage domain for all our clock domains
-system.voltage_domain = VoltageDomain(voltage = options.sys_voltage)
+system.voltage_domain = VoltageDomain(voltage = args.sys_voltage)
 
 system.kernel = FSConfig.binary('x86_64-vmlinux-2.6.22.9.smp')
 system.clk_domain = SrcClockDomain(clock = '1GHz',
@@ -62,12 +62,12 @@
 system.cpu_clk_domain = SrcClockDomain(clock = '2GHz',
                                        voltage_domain = system.voltage_domain)
 system.cpu = [TimingSimpleCPU(cpu_id=i, clk_domain = system.cpu_clk_domain)
-              for i in range(options.num_cpus)]
+              for i in range(args.num_cpus)]
 
-Ruby.create_system(options, True, system, system.iobus, system._dma_ports)
+Ruby.create_system(args, True, system, system.iobus, system._dma_ports)
 
 # Create a seperate clock domain for Ruby
-system.ruby.clk_domain = SrcClockDomain(clock = options.ruby_clock,
+system.ruby.clk_domain = SrcClockDomain(clock = args.ruby_clock,
                                         voltage_domain = system.voltage_domain)
 
 # Connect the ruby io port to the PIO bus,
diff --git a/tests/configs/rubytest-ruby.py b/tests/configs/rubytest-ruby.py
index 0db7e38060..409278d72d 100644
--- a/tests/configs/rubytest-ruby.py
+++ b/tests/configs/rubytest-ruby.py
@@ -29,34 +29,34 @@
 from m5.objects import *
 from m5.defines import buildEnv
 from m5.util import addToPath
-import os, optparse, sys
+import os, argparse, sys
 
 m5.util.addToPath('../configs/')
 
 from ruby import Ruby
 from common import Options
 
-parser = optparse.OptionParser()
+parser = argparse.ArgumentParser()
 Options.addNoISAOptions(parser)
 
 # Add the ruby specific and protocol specific options
 Ruby.define_options(parser)
 
-(options, args) = parser.parse_args()
+args = parser.parse_args()
 
 #
 # Set the default cache size and associativity to be very small to encourage
 # races between requests and writebacks.
 #
-options.l1d_size="256B"
-options.l1i_size="256B"
-options.l2_size="512B"
-options.l3_size="1kB"
-options.l1d_assoc=2
-options.l1i_assoc=2
-options.l2_assoc=2
-options.l3_assoc=2
-options.ports=32
+args.l1d_size="256B"
+args.l1i_size="256B"
+args.l2_size="512B"
+args.l3_size="1kB"
+args.l1d_assoc=2
+args.l1i_assoc=2
+args.l2_assoc=2
+args.l3_assoc=2
+args.ports=32
 
 # Turn on flush check for the hammer protocol
 check_flush = False
@@ -67,14 +67,14 @@
 # create the tester and system, including ruby
 #
 tester = RubyTester(check_flush = check_flush, checks_to_complete = 100,
-                    wakeup_frequency = 10, num_cpus = options.num_cpus)
+                    wakeup_frequency = 10, num_cpus = args.num_cpus)
 
 # We set the testers as cpu for ruby to find the correct clock domains
 # for the L1 Objects.
 system = System(cpu = tester)
 
 # Dummy voltage domain for all our clock domains
-system.voltage_domain = VoltageDomain(voltage = options.sys_voltage)
+system.voltage_domain = VoltageDomain(voltage = args.sys_voltage)
 system.clk_domain = SrcClockDomain(clock = '1GHz',
                                    voltage_domain = system.voltage_domain)
 
@@ -85,14 +85,14 @@
 # is stored in system.cpu. because there is only ever one
 # tester object, num_cpus is not necessarily equal to the
 # size of system.cpu
-cpu_list = [ system.cpu ] * options.num_cpus
-Ruby.create_system(options, False, system, cpus=cpu_list)
+cpu_list = [ system.cpu ] * args.num_cpus
+Ruby.create_system(args, False, system, cpus=cpu_list)
 
 # Create a separate clock domain for Ruby
 system.ruby.clk_domain = SrcClockDomain(clock = '1GHz',
                                         voltage_domain = system.voltage_domain)
 
-assert(options.num_cpus == len(system.ruby._cpu_ports))
+assert(args.num_cpus == len(system.ruby._cpu_ports))
 
 tester.num_cpus = len(system.ruby._cpu_ports)
 
diff --git a/tests/configs/simple-timing-mp-ruby.py b/tests/configs/simple-timing-mp-ruby.py
index 3657506ddd..4218495f3a 100644
--- a/tests/configs/simple-timing-mp-ruby.py
+++ b/tests/configs/simple-timing-mp-ruby.py
@@ -28,39 +28,39 @@
 from m5.objects import *
 from m5.defines import buildEnv
 from m5.util import addToPath
-import os, optparse, sys
+import os, argparse, sys
 
 m5.util.addToPath('../configs/')
 
 from common import Options
 from ruby import Ruby
 
-parser = optparse.OptionParser()
+parser = argparse.ArgumentParser()
 Options.addCommonOptions(parser)
 
 # Add the ruby specific and protocol specific options
 Ruby.define_options(parser)
 
-(options, args) = parser.parse_args()
+args = parser.parse_args()
 
 #
 # Set the default cache size and associativity to be very small to encourage
 # races between requests and writebacks.
 #
-options.l1d_size="256B"
-options.l1i_size="256B"
-options.l2_size="512B"
-options.l3_size="1kB"
-options.l1d_assoc=2
-options.l1i_assoc=2
-options.l2_assoc=2
-options.l3_assoc=2
+args.l1d_size="256B"
+args.l1i_size="256B"
+args.l2_size="512B"
+args.l3_size="1kB"
+args.l1d_assoc=2
+args.l1i_assoc=2
+args.l2_assoc=2
+args.l3_assoc=2
 
 nb_cores = 4
 cpus = [ TimingSimpleCPU(cpu_id=i) for i in range(nb_cores) ]
 
 # overwrite the num_cpus to equal nb_cores
-options.num_cpus = nb_cores
+args.num_cpus = nb_cores
 
 # system simulated
 system = System(cpu = cpus, clk_domain = SrcClockDomain(clock = '1GHz'))
@@ -69,12 +69,12 @@
 # CPUs frequency
 system.cpu.clk_domain = SrcClockDomain(clock = '2GHz')
 
-Ruby.create_system(options, False, system)
+Ruby.create_system(args, False, system)
 
 # Create a separate clock domain for Ruby
-system.ruby.clk_domain = SrcClockDomain(clock = options.ruby_clock)
+system.ruby.clk_domain = SrcClockDomain(clock = args.ruby_clock)
 
-assert(options.num_cpus == len(system.ruby._cpu_ports))
+assert(args.num_cpus == len(system.ruby._cpu_ports))
 
 for (i, cpu) in enumerate(system.cpu):
     # create the interrupt controller
diff --git a/tests/configs/simple-timing-ruby.py b/tests/configs/simple-timing-ruby.py
index 4c5cdc05e2..d0ef6c560a 100644
--- a/tests/configs/simple-timing-ruby.py
+++ b/tests/configs/simple-timing-ruby.py
@@ -28,41 +28,41 @@
 from m5.objects import *
 from m5.defines import buildEnv
 from m5.util import addToPath
-import os, optparse, sys
+import os, argparse, sys
 
 m5.util.addToPath('../configs/')
 
 from ruby import Ruby
 from common import Options
 
-parser = optparse.OptionParser()
+parser = argparse.ArgumentParser()
 Options.addCommonOptions(parser)
 
 # Add the ruby specific and protocol specific options
 Ruby.define_options(parser)
 
-(options, args) = parser.parse_args()
+args = parser.parse_args()
 
 #
 # Set the default cache size and associativity to be very small to encourage
 # races between requests and writebacks.
 #
-options.l1d_size="256B"
-options.l1i_size="256B"
-options.l2_size="512B"
-options.l3_size="1kB"
-options.l1d_assoc=2
-options.l1i_assoc=2
-options.l2_assoc=2
-options.l3_assoc=2
+args.l1d_size="256B"
+args.l1i_size="256B"
+args.l2_size="512B"
+args.l3_size="1kB"
+args.l1d_assoc=2
+args.l1i_assoc=2
+args.l2_assoc=2
+args.l3_assoc=2
 
 # this is a uniprocessor only test
-options.num_cpus = 1
+args.num_cpus = 1
 cpu = TimingSimpleCPU(cpu_id=0)
 system = System(cpu = cpu)
 
 # Dummy voltage domain for all our clock domains
-system.voltage_domain = VoltageDomain(voltage = options.sys_voltage)
+system.voltage_domain = VoltageDomain(voltage = args.sys_voltage)
 system.clk_domain = SrcClockDomain(clock = '1GHz',
                                    voltage_domain = system.voltage_domain)
 
@@ -72,10 +72,10 @@
                                        voltage_domain = system.voltage_domain)
 
 system.mem_ranges = AddrRange('256MB')
-Ruby.create_system(options, False, system)
+Ruby.create_system(args, False, system)
 
 # Create a separate clock for Ruby
-system.ruby.clk_domain = SrcClockDomain(clock = options.ruby_clock,
+system.ruby.clk_domain = SrcClockDomain(clock = args.ruby_clock,
                                         voltage_domain = system.voltage_domain)
 
 assert(len(system.ruby._cpu_ports) == 1)
diff --git a/tests/gem5/configs/base_config.py b/tests/gem5/configs/base_config.py
index 80bfa2d3db..260324dbc0 100644
--- a/tests/gem5/configs/base_config.py
+++ b/tests/gem5/configs/base_config.py
@@ -34,7 +34,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 from abc import ABCMeta, abstractmethod
-import optparse
+import argparse
 import m5
 from m5.objects import *
 from m5.proxy import *
@@ -164,29 +164,29 @@ def init_system(self, system):
 
         if self.use_ruby:
             # Add the ruby specific and protocol specific options
-            parser = optparse.OptionParser()
+            parser = argparse.ArgumentParser()
             Options.addCommonOptions(parser)
             Ruby.define_options(parser)
-            (options, args) = parser.parse_args()
+            args, extra = parser.parse_known_args()
 
             # Set the default cache size and associativity to be very
             # small to encourage races between requests and writebacks.
-            options.l1d_size="32kB"
-            options.l1i_size="32kB"
-            options.l2_size="4MB"
-            options.l1d_assoc=4
-            options.l1i_assoc=2
-            options.l2_assoc=8
-            options.num_cpus = self.num_cpus
-            options.num_dirs = 2
+            args.l1d_size="32kB"
+            args.l1i_size="32kB"
+            args.l2_size="4MB"
+            args.l1d_assoc=4
+            args.l1i_assoc=2
+            args.l2_assoc=8
+            args.num_cpus = self.num_cpus
+            args.num_dirs = 2
 
             bootmem = getattr(system, '_bootmem', None)
-            Ruby.create_system(options, True, system, system.iobus,
+            Ruby.create_system(args, True, system, system.iobus,
                                system._dma_ports, bootmem)
 
             # Create a seperate clock domain for Ruby
             system.ruby.clk_domain = SrcClockDomain(
-                clock = options.ruby_clock,
+                clock = args.ruby_clock,
                 voltage_domain = system.voltage_domain)
             for i, cpu in enumerate(system.cpu):
                 if not cpu.switched_out:
diff --git a/tests/gem5/x86-boot-tests/run_exit.py b/tests/gem5/x86-boot-tests/run_exit.py
index ecb3a8454a..89685652f7 100644
--- a/tests/gem5/x86-boot-tests/run_exit.py
+++ b/tests/gem5/x86-boot-tests/run_exit.py
@@ -43,7 +43,6 @@
 parser.add_argument('--num-cpus', type=int)
 parser.add_argument('--boot-type', choices=['init', 'systemd',])
 
-#(options, args) = parser.parse_args()
 args = parser.parse_args()
 
 # create the system we are going to simulate