|
1 | 1 | # createHashFromColumns --------------------------------------------------------
|
2 |
| -createHashFromColumns <- function(data, columns, nchars = 8L, silent = FALSE) |
| 2 | +createHashFromColumns <- function( |
| 3 | + data, columns, nchars = 8L, silent = FALSE, makeUnique = FALSE |
| 4 | +) |
3 | 5 | {
|
4 | 6 | duplicates <- kwb.utils::findPartialDuplicates(data, columns)
|
5 | 7 |
|
6 | 8 | if (!is.null(duplicates)) {
|
7 |
| - |
8 | 9 | if (!silent) {
|
9 | 10 | message(
|
10 | 11 | "Cannot create unique hashes due to duplicates in the key columns (",
|
11 | 12 | kwb.utils::stringList(columns),
|
12 |
| - ")! Returning -1L. Check attribute 'duplicates'." |
| 13 | + ")! " |
13 | 14 | )
|
| 15 | + if (makeUnique) { |
| 16 | + message("I will make the hashes unique.") |
| 17 | + } else { |
| 18 | + message("Returning -1L. Check attribute 'duplicates'.") |
| 19 | + } |
| 20 | + } |
| 21 | + if (!makeUnique) { |
| 22 | + return(structure(-1L, duplicates = duplicates)) |
14 | 23 | }
|
15 |
| - |
16 |
| - return(structure(-1L, duplicates = duplicates)) |
17 | 24 | }
|
18 | 25 |
|
19 | 26 | keys <- kwb.utils::pasteColumns(data, columns, "|")
|
20 |
| - |
21 |
| - stopifnot(!anyDuplicated(keys)) |
| 27 | + |
| 28 | + if (!makeUnique) { |
| 29 | + stopifnot(!anyDuplicated(keys)) |
| 30 | + } |
22 | 31 |
|
23 | 32 | hashes <- kwb.utils::left(unlist(lapply(keys, digest::digest)), nchars)
|
24 | 33 |
|
25 |
| - stopifnot(!anyDuplicated(hashes)) |
| 34 | + if (makeUnique) { |
| 35 | + hashes <- kwb.utils::makeUnique(hashes, warn = FALSE) |
| 36 | + } |
26 | 37 |
|
| 38 | + stopifnot(!anyDuplicated(hashes)) |
| 39 | + |
27 | 40 | hashes
|
28 | 41 | }
|
0 commit comments