@@ -273,6 +273,13 @@ def SLR_generate_unit_dictionaries(units_string, strictness):
273273 prefixes_long_to_short [prefix ]+ units_long_to_short [unit ]: prefix + units [unit ]
274274 }
275275 )
276+ if prefix + units_long_to_short [unit ] not in units_short_to_long .keys ():
277+ prefixed_units .update (
278+ {
279+ prefix + units_long_to_short [unit ]: prefix + units [unit ]
280+ }
281+ )
282+
276283
277284 prefixed_units_end = {** units_end }
278285 for unit in units_end .keys ():
@@ -486,3 +493,104 @@ def SLR_quantity_parsing(expr, parameters, parser, name):
486493
487494 tag_handler = set_tags (parameters .get ("strictness" , "strict" ))
488495 return PhysicalQuantity (name , parameters , quantity [0 ], parser , messages = [], tag_handler = tag_handler )
496+
497+ def expression_preprocess (name , expr , parameters ):
498+ if parameters .get ("strictness" , "natural" ) == "legacy" :
499+ expr = preprocess_legacy (expr , parameters )
500+ return True , expr , None
501+
502+ expr = transform_prefixes_to_standard (expr )
503+
504+ return True , expr , None
505+
506+ def preprocess_legacy (expr , parameters ):
507+ prefix_data = {(p [0 ], p [1 ], tuple (), p [3 ]) for p in set_of_SI_prefixes }
508+ prefixes = []
509+ for prefix in prefix_data :
510+ prefixes = prefixes + [prefix [0 ]] + list (prefix [- 1 ])
511+ prefix_short_forms = [prefix [1 ] for prefix in prefix_data ]
512+ unit_data = set_of_SI_base_unit_dimensions \
513+ | set_of_derived_SI_units_in_SI_base_units \
514+ | set_of_common_units_in_SI \
515+ | set_of_very_common_units_in_SI \
516+ | set_of_imperial_units
517+ unit_long_forms = prefixes
518+ for unit in unit_data :
519+ unit_long_forms = unit_long_forms + [unit [0 ]] + list (unit [- 2 ]) + list (unit [- 1 ])
520+ unit_long_forms = "(" + "|" .join (unit_long_forms ) + ")"
521+ # Rewrite any expression on the form "*UNIT" (but not "**UNIT") as " UNIT"
522+ # Example: "newton*metre" ---> "newton metre"
523+ search_string = r"(?<!\*)\* *" + unit_long_forms
524+ match_content = re .search (search_string , expr [1 :])
525+ while match_content is not None :
526+ expr = expr [0 :match_content .span ()[0 ] + 1 ] + match_content .group ().replace ("*" , " " ) + expr [
527+ match_content .span ()[
528+ 1 ] + 1 :]
529+ match_content = re .search (search_string , expr [1 :])
530+ prefixes = "(" + "|" .join (prefixes ) + ")"
531+ # Rewrite any expression on the form "PREFIX UNIT" as "PREFIXUNIT"
532+ # Example: "kilo metre" ---> "kilometre"
533+ search_string = prefixes + " " + unit_long_forms
534+ match_content = re .search (search_string , expr )
535+ while match_content is not None :
536+ expr = expr [0 :match_content .span ()[0 ]] + " " + "" .join (match_content .group ().split ()) + expr [
537+ match_content .span ()[
538+ 1 ]:]
539+ match_content = re .search (search_string , expr )
540+ unit_short_forms = [u [1 ] for u in unit_data ]
541+ short_forms = "(" + "|" .join (list (set (prefix_short_forms + unit_short_forms ))) + ")"
542+ # Add space before short forms of prefixes or unit names if they are preceded by numbers or multiplication
543+ # Example: "100Pa" ---> "100 Pa"
544+ search_string = r"[0-9\*\(\)]" + short_forms
545+ match_content = re .search (search_string , expr )
546+ while match_content is not None :
547+ expr = expr [0 :match_content .span ()[0 ] + 1 ] + " " + expr [match_content .span ()[0 ] + 1 :]
548+ match_content = re .search (search_string , expr )
549+ # Remove space after prefix short forms if they are preceded by numbers, multiplication or space
550+ # Example: "100 m Pa" ---> "100 mPa"
551+ prefix_short_forms = "(" + "|" .join (prefix_short_forms ) + ")"
552+ search_string = r"[0-9\*\(\) ]" + prefix_short_forms + " "
553+ match_content = re .search (search_string , expr )
554+ while match_content is not None :
555+ expr = expr [0 :match_content .span ()[0 ] + 1 ] + match_content .group ()[0 :- 1 ] + expr [match_content .span ()[1 ]:]
556+ match_content = re .search (search_string , expr )
557+ # Remove multiplication and space after prefix short forms if they are preceded by numbers, multiplication or space
558+ # Example: "100 m* Pa" ---> "100 mPa"
559+ search_string = r"[0-9\*\(\) ]" + prefix_short_forms + "\* "
560+ match_content = re .search (search_string , expr )
561+ while match_content is not None :
562+ expr = expr [0 :match_content .span ()[0 ] + 1 ] + match_content .group ()[0 :- 2 ] + expr [match_content .span ()[1 ]:]
563+ match_content = re .search (search_string , expr )
564+ # Replace multiplication followed by space before unit short forms with only spaces if they are preceded by numbers or space
565+ # Example: "100* Pa" ---> "100 Pa"
566+ unit_short_forms = "(" + "|" .join (unit_short_forms ) + ")"
567+ search_string = r"[0-9\(\) ]\* " + unit_short_forms
568+ match_content = re .search (search_string , expr )
569+ while match_content is not None :
570+ expr = expr [0 :match_content .span ()[0 ]] + match_content .group ().replace ("*" , " " ) + expr [
571+ match_content .span ()[1 ]:]
572+ match_content = re .search (search_string , expr )
573+
574+ return expr
575+
576+ def transform_prefixes_to_standard (expr ):
577+ """
578+ Transform ONLY alternative prefix spellings to standard prefix names.
579+ Ensure there's exactly one space after the prefix before the unit.
580+ Works for both attached (e.g. 'km') and spaced (e.g. 'k m') forms.
581+ """
582+
583+ for prefix_name , symbol , power , alternatives in set_of_SI_prefixes :
584+ for alt in alternatives :
585+ if not alt :
586+ continue
587+
588+ # Match the alternative prefix either attached to or followed by spaces before a unit
589+ # Examples matched: "km", "k m", "microsecond", "micro second"
590+ pattern = rf'(?<!\w){ re .escape (alt )} \s*(?=[A-Za-zµΩ])'
591+ expr = re .sub (pattern , prefix_name , expr )
592+
593+ # Normalize spacing (no multiple spaces)
594+ expr = re .sub (r'\s{2,}' , ' ' , expr ).strip ()
595+
596+ return expr
0 commit comments