|
| 1 | +<?php |
| 2 | + |
| 3 | +declare(strict_types=1); |
| 4 | + |
| 5 | +namespace MakinaCorpus\DbToolsBundle\Anonymization\Anonymizer\Core; |
| 6 | + |
| 7 | +use MakinaCorpus\DbToolsBundle\Anonymization\Anonymizer\AbstractMultipleColumnAnonymizer; |
| 8 | +use MakinaCorpus\DbToolsBundle\Attribute\AsAnonymizer; |
| 9 | +use MakinaCorpus\DbToolsBundle\Error\ConfigurationException; |
| 10 | +use MakinaCorpus\DbToolsBundle\Helper\FileReader; |
| 11 | + |
| 12 | +#[AsAnonymizer( |
| 13 | + name: 'file_column', |
| 14 | + pack: 'core', |
| 15 | + description: <<<TXT |
| 16 | + Anonymize multiple text values using a random row from the given file. |
| 17 | + Options are: |
| 18 | + - 'columns': column names that matches file columns. If you need to |
| 19 | + skip one of the file columns, simply set null instead of a name. |
| 20 | + Please remember that other option names defined here cannot be |
| 21 | + column names. |
| 22 | + - 'source': filename to load, filename must be absolute, or relative |
| 23 | + to the configuration file directory. |
| 24 | + - 'file_csv_enclosure': if file is a CSV, use this as the enclosure |
| 25 | + character (default is '"'). |
| 26 | + - 'file_csv_escape': if file is a CSV, use this as the escape |
| 27 | + character (default is '\\'). |
| 28 | + - 'file_csv_separator': if file is a CSV, use this as the separator |
| 29 | + character (default is ','). |
| 30 | + - 'file_skip_header': when reading any file, set this to true to skip |
| 31 | + the first line (default is false). |
| 32 | + TXT |
| 33 | +)] |
| 34 | +class FileMultipleColumnAnonymizer extends AbstractMultipleColumnAnonymizer |
| 35 | +{ |
| 36 | + #[\Override] |
| 37 | + protected function validateOptions(): void |
| 38 | + { |
| 39 | + parent::validateOptions(); |
| 40 | + |
| 41 | + FileReader::ensureFile($this->options->getString('source', null, true)); |
| 42 | + |
| 43 | + $columns = $this->options->get('columns', null, true); |
| 44 | + if (!\is_array($columns)) { |
| 45 | + throw new ConfigurationException("'columns' must be an array of string or null values."); |
| 46 | + } |
| 47 | + $invalidNames = ['source', 'columns', 'file_csv_enclosure', 'file_csv_escape', 'file_csv_separator', 'file_skip_header']; |
| 48 | + foreach ($columns as $index => $column) { |
| 49 | + if (\in_array($column, $invalidNames)) { |
| 50 | + throw new ConfigurationException(\sprintf("'columns' values cannot be one of ('%s') for column #%d.", \implode("', '", $invalidNames), $index)); |
| 51 | + } |
| 52 | + if (!\is_string($column) && null !== $column) { |
| 53 | + throw new ConfigurationException(\sprintf("'columns' must be an array of string or null values (invalid type for column #%d.", $index)); |
| 54 | + } |
| 55 | + } |
| 56 | + } |
| 57 | + |
| 58 | + #[\Override] |
| 59 | + protected function getColumnNames(): array |
| 60 | + { |
| 61 | + $ret = []; |
| 62 | + |
| 63 | + $ignored = 0; |
| 64 | + foreach ($this->options->get('columns', null, true) as $name) { |
| 65 | + if (null === $name) { |
| 66 | + // It's easier to proceed this way than to strip down each |
| 67 | + // sample rows from the ignored columns in getSamples(). |
| 68 | + // Even though, it would be cleaner, let's keep everything |
| 69 | + // simple for now. |
| 70 | + $ret[] = '_ignored' . ($ignored++); |
| 71 | + } else { |
| 72 | + $ret[] = $name; |
| 73 | + } |
| 74 | + } |
| 75 | + |
| 76 | + return $ret; |
| 77 | + } |
| 78 | + |
| 79 | + #[\Override] |
| 80 | + protected function getSamples(): array |
| 81 | + { |
| 82 | + return \iterator_to_array( |
| 83 | + FileReader::readColumnFile( |
| 84 | + $this->options->getString('source', null, true), |
| 85 | + $this->options, |
| 86 | + ), |
| 87 | + ); |
| 88 | + } |
| 89 | +} |
0 commit comments