-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathall_strip.sql
286 lines (243 loc) · 8.69 KB
/
all_strip.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
BEGIN;
DROP TABLE IF EXISTS t12 CASCADE;
DROP VIEW IF EXISTS spaces;
DROP FUNCTION IF EXISTS public.strip CASCADE;
DROP PROCEDURE IF EXISTS generic_text_trigger_transform(regclass, text[], text);
/*
Once for all, remove all the leading and trailing white spaces.
references:
https://jkorpela.fi/chars/spaces.html
https://dbfiddle.uk/IcNrq3O1
https://stackoverflow.com/questions/22699535/trim-trailing-spaces-with-postgresql/22701212#22701212
https://stackoverflow.com/questions/63302656/remove-all-unicode-space-separators-in-postgresql/63311776#63311776
https://en.wikipedia.org/wiki/Whitespace_character
https://www.postgresql.org/docs/current/functions-string.html
*/
CREATE OR REPLACE FUNCTION public.strip()
RETURNS TRIGGER
SET search_path FROM current
AS $func$
DECLARE
_sql text;
arg text;
_typ CONSTANT regtype[] := '{text,bpchar,name, varchar}';
_found bool;
white_space_class text := '[\s\u00a0\u180e\u2007\u200b-\u200f\u202f\u2060\ufeff]+';
lead_space text := '^' || white_space_class;
trail_space text := white_space_class || '$';
rec record;
argstr text[];
BEGIN
/*
TG_ARGV[].
An optional comma-separated list of arguments to be provided to the function when the trigger is executed.
The arguments are literal string constants. Simple names and numeric constants can be written here, too,
but they will all be converted to strings.
Please check the description of the implementation language of the trigger function
to find out how these arguments can be accessed within the function;
it might be different from normal function arguments.
example: https://github.com/postgres/postgres/blob/58f5edf849900bc248b7c909ca17da7287306c41/src/test/regress/expected/triggers.out#L846
*/
FOR i IN 0..TG_nargs - 1 LOOP
argstr := argstr || TG_argv[i];
END LOOP;
SELECT
INTO _sql,
_found 'select ' || string_agg(
CASE WHEN a.atttypid = ANY (_typ) AND attname = ANY (argstr) THEN
format('regexp_replace(regexp_replace(%1$s,%L,''''),%L,'''' ) as %1$s '
, a.col, lead_space, trail_space, a.col)
ELSE
col
END, ', ') || ' FROM (select ($1).*) t',
bool_or(a.atttypid = ANY (_typ))
FROM (
SELECT
a.atttypid,
quote_ident(attname) AS col,
attname
FROM
pg_catalog.pg_attribute a
WHERE
a.attrelid = TG_RELID
AND a.attnum >= 1
AND NOT a.attisdropped) a;
-- RAISE NOTICE '_sql: %', _sql;
IF _found THEN
EXECUTE _sql
USING new INTO new;
END IF;
RETURN new;
END
$func$
LANGUAGE plpgsql;
/*
generic text trigger transform.
for the specified table ($1),columns ($2)
create trigger based on $3 trigger function.
validate $3 is a trigger function not in pg_catalog, is visible to search_path
validate $1,$2 do actually exists.
*/
CREATE OR REPLACE PROCEDURE generic_text_trigger_transform(regclass, text[], text)
AS $func$
DECLARE
stmt text;
trg text;
args text;
_typ CONSTANT regtype[] := '{text,bpchar,name, varchar}';
_sql text;
BEGIN
IF $1 IS NULL OR $2 IS NULL OR $3 IS NULL THEN
RAISE EXCEPTION '$1, $2, $3 all should not be null!';
END IF;
IF NOT EXISTS(
SELECT
FROM pg_catalog.pg_proc pp
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = pp.pronamespace
WHERE pp.proname = $3
AND pg_catalog.pg_function_is_visible(pp.oid)
AND n.nspname::text <> 'pg_catalog'
AND pg_catalog.pg_get_function_result(pp.oid) = 'trigger'
)
THEN
RAISE EXCEPTION '$3 should be a function that not in pg_catalog schema and return trigger and is visible to search_path';
END IF;
IF NOT EXISTS(
SELECT
FROM pg_attribute pa
JOIN pg_class pc
ON pc.oid = pa.attrelid
WHERE pa.attrelid = $1
AND pa.attnum > 0
AND pa.attisdropped IS FALSE
AND pc.relpersistence = 'p'
AND pc.relkind in ('r','m','p') --regular table or materialized view or partitioned table.
AND pa.attgenerated = '' --should not touch generated columns
AND pa.atttypid = ANY(_typ)
HAVING array_agg(attname::text) @> $2
) THEN
RAISE EXCEPTION 'upper_this exception! because one or more of the following cases yield true:
* not all columns($2) in $1
* $1 table not exists
* $1 is not in (regular table, materialized view, partitioned table)
* $1 is not permanent table
* one of $2 is generated columns
* $2 type <> ANY{text,bpchar,name, varchar}';
END IF;
trg := $1::text || '_' ||$3 || '_' || (
SELECT
string_agg((x), '_')
FROM
unnest($2) sub (x));
IF EXISTS (
SELECT FROM pg_trigger
WHERE tgrelid = $1
AND tgname = trg) THEN
RAISE EXCEPTION 'trigger % on % already exists!',trg, $1;
END IF;
args := '(' || (
SELECT string_agg(quote_literal(x), ', ')
FROM unnest($2) sub (x)) || ')';
stmt := format('
CREATE TRIGGER %s
BEFORE INSERT OR UPDATE ON ', trg) || $1::text|| ' FOR EACH ROW '
' EXECUTE PROCEDURE ' || $3 || ' ' || args;
-- RAISE NOTICE 'stmt:%', stmt;
EXECUTE stmt;
END
$func$
LANGUAGE plpgsql;
comment on function public.strip is $$
trigger based function to strip leading and trailing white spaces.
references:
https://jkorpela.fi/chars/spaces.html
https://dbfiddle.uk/IcNrq3O1
https://stackoverflow.com/questions/22699535/trim-trailing-spaces-with-postgresql/22701212#22701212
https://stackoverflow.com/questions/63302656/remove-all-unicode-space-separators-in-postgresql/63311776#63311776
https://en.wikipedia.org/wiki/Whitespace_character
https://www.postgresql.org/docs/current/functions-string.html
$$;
comment on PROCEDURE generic_text_trigger_transform is $$
generic text trigger transform.
for the specified table ($1),columns ($2)
create trigger based on $3 trigger function.
validate $3 is a trigger function not in pg_catalog, is visible to search_path
validate $1,$2 do actually exists.
$$;
COMMIT;
------------------------------------------------------------------------
--test
--validate comment is there.
SELECT length(description) > 400 as comment_there
FROM all_comment
WHERE name = 'strip';
BEGIN;
DROP TABLE IF EXISTS t12 CASCADE;
DROP VIEW IF EXISTS spaces CASCADE;
CREATE TABLE t12 (
tid int GENERATED BY DEFAULT AS IDENTITY,
chr_d int,
in_posix_space_class bool,
src text,
src_v1 text,
src_v2 text
);
CREATE OR REPLACE VIEW spaces AS
SELECT
chr_d,
chr(chr_d) ~ '\s' AS in_posix_space_class,
chr(chr_d) || ' test my string ' || chr(chr_d) AS src,
length(chr(chr_d) || ' test my string ' || chr(chr_d)) AS src_len
FROM (
SELECT
unnest('{9,32,160,5760,6158,8239,8287,8288,12288,65279}'::int[])
UNION ALL
SELECT
generate_series(8192, 8202) AS dec -- UNICODE "Spaces"
UNION ALL
SELECT
generate_series(8203, 8207) AS dec -- First 5 space-like UNICODE "Format characters"
) t (chr_d);
COMMIT;
---- test insert on src column.
BEGIN;
call generic_text_trigger_transform('t12',array['src'],'strip');
INSERT INTO t12 (chr_d, in_posix_space_class, src)
SELECT chr_d,in_posix_space_class,src
FROM spaces;
SELECT count(*) as src_should_be_zero
FROM t12
where length(src) <> 14
and src IS NOT NULL;
ROLLBACK;
--test update on column src_v2.
BEGIN;
call generic_text_trigger_transform('t12',array['src_v2'],'strip');
INSERT INTO t12 (chr_d, in_posix_space_class, src)
SELECT chr_d, in_posix_space_class, src
FROM spaces;
UPDATE t12 SET src_v2 = src;
SELECT count(*) as src_v2_should_be_zero
FROM t12
where length(src_v2) <> 14
and src_v2 IS NOT NULL;
ROLLBACK;
--test update on column src_v1, src_v2.
BEGIN;
call generic_text_trigger_transform('t12',array['src_v1','src_v2'],'strip');
INSERT INTO t12 (chr_d, in_posix_space_class, src)
SELECT chr_d,in_posix_space_class,src
FROM spaces;
UPDATE t12 SET src_v1 = src,src_v2 = src;
SELECT count(*) as src_v1_should_be_zero
FROM t12
where length(src_v1) <> 14
and src_v1 IS NOT NULL;
SELECT count(*) as src_v2_should_be_zero
FROM t12
where length(src_v2) <> 14
and src_v2 IS NOT NULL;
ROLLBACK;
--clean up.
DROP TABLE IF EXISTS t12 CASCADE;
DROP VIEW IF EXISTS spaces;