1+ using OfficeOpenXml . FormulaParsing . Excel . Functions . MathFunctions ;
2+ using OfficeOpenXml . FormulaParsing . Excel . Functions . Metadata ;
3+ using OfficeOpenXml . FormulaParsing . Excel . Functions . RefAndLookup ;
4+ using OfficeOpenXml . FormulaParsing . FormulaExpressions ;
5+ using OfficeOpenXml . FormulaParsing . Ranges ;
6+ using OfficeOpenXml . RichData . IndexRelations ;
7+ using System ;
8+ using System . Collections . Generic ;
9+ using System . Linq ;
10+ using System . Runtime . CompilerServices ;
11+ using System . Text ;
12+ using System . Text . RegularExpressions ;
13+
14+ namespace OfficeOpenXml . FormulaParsing . Excel . Functions . Text
15+ {
16+ [ FunctionMetadata (
17+ Category = ExcelFunctionCategory . Text ,
18+ EPPlusVersion = "8.6" ,
19+ Description = "Extracts text matching a regular expression pattern from input string values." ,
20+ SupportsArrays = true ) ]
21+ internal class RegexExtract : RegexFunctionBase
22+ {
23+ public override int ArgumentMinLength => 2 ;
24+
25+ public override string NamespacePrefix => "_xlfn." ;
26+
27+ public override CompileResult Execute ( IList < FunctionArgument > arguments , ParsingContext context )
28+ {
29+ bool textIsRange = arguments [ 0 ] . IsExcelRange ;
30+ bool patternIsRange = arguments [ 1 ] . IsExcelRange ;
31+ int returnMode = arguments . Count > 2 ? ArgToInt ( arguments , 2 , 0 ) : 0 ;
32+ int caseSensitivity = arguments . Count > 3 ? ArgToInt ( arguments , 3 , 0 ) : 0 ;
33+
34+ if ( ! textIsRange && ! patternIsRange )
35+ {
36+ var text = arguments [ 0 ] . Value ? . ToString ( ) ;
37+ var pattern = arguments [ 1 ] . Value ? . ToString ( ) ;
38+
39+ if ( text == null || pattern == null )
40+ return CreateResult ( ExcelErrorValue . Create ( eErrorType . NA ) , DataType . ExcelError ) ;
41+ if ( caseSensitivity > 1 || caseSensitivity < 0 || returnMode < 0 || returnMode > 2 )
42+ return CreateResult ( ExcelErrorValue . Create ( eErrorType . Value ) , DataType . ExcelError ) ;
43+
44+ if ( returnMode == 1 )
45+ {
46+ var matches = GetMatches ( text , pattern , caseSensitivity ) ;
47+ if ( matches . Length == 0 )
48+ return CreateResult ( ExcelErrorValue . Create ( eErrorType . NA ) , DataType . ExcelError ) ;
49+
50+ var arr = new InMemoryRange ( ( short ) 1 , ( short ) matches . Length ) ;
51+ for ( int i = 0 ; i < matches . Length ; i ++ )
52+ arr . SetValue ( 0 , i , matches [ i ] ) ;
53+
54+ return CreateDynamicArrayResult ( arr , DataType . ExcelRange ) ;
55+ }
56+ else if ( returnMode == 2 )
57+ {
58+ // Read the number of capturing groups from the pattern (GetGroupNumbers
59+ // includes group 0). A failed match reports Groups.Count == 1, so this must
60+ // not be read from the match. No groups -> #VALUE!; groups but no match -> #N/A.
61+ var regex = new Regex ( pattern , ( RegexOptions ) caseSensitivity ) ;
62+ if ( regex . GetGroupNumbers ( ) . Length <= 1 )
63+ return CreateResult ( ExcelErrorValue . Create ( eErrorType . Value ) , DataType . ExcelError ) ;
64+
65+ var match = regex . Match ( text ) ;
66+ if ( ! match . Success )
67+ return CreateResult ( ExcelErrorValue . Create ( eErrorType . NA ) , DataType . ExcelError ) ;
68+
69+ var groups = match . Groups
70+ . Cast < Group > ( )
71+ . Skip ( 1 )
72+ . Select ( g => g . Value )
73+ . ToArray ( ) ;
74+
75+ var arr = new InMemoryRange ( ( short ) 1 , ( short ) groups . Length ) ;
76+ for ( int i = 0 ; i < groups . Length ; i ++ )
77+ arr . SetValue ( 0 , i , groups [ i ] ) ;
78+
79+ return CreateDynamicArrayResult ( arr , DataType . ExcelRange ) ;
80+ }
81+ var firstMatch = Regex . Match ( text , pattern , ( RegexOptions ) caseSensitivity ) ;
82+ if ( ! firstMatch . Success )
83+ return CreateResult ( ExcelErrorValue . Create ( eErrorType . NA ) , DataType . ExcelError ) ;
84+ return CreateResult ( firstMatch . Value , DataType . String ) ;
85+ }
86+
87+ var texts = textIsRange ? arguments [ 0 ] . ValueAsRangeInfo : null ;
88+ var patterns = patternIsRange ? arguments [ 1 ] . ValueAsRangeInfo : null ;
89+
90+ int textRows = texts != null ? texts . Size . NumberOfRows : 1 ;
91+ int textCols = texts != null ? texts . Size . NumberOfCols : 1 ;
92+ int patternRows = patterns != null ? patterns . Size . NumberOfRows : 1 ;
93+ int patternCols = patterns != null ? patterns . Size . NumberOfCols : 1 ;
94+
95+ var nRows = ExpandedSize ( textRows , patternRows ) ;
96+ var nCols = ExpandedSize ( textCols , patternCols ) ;
97+
98+ var result = new InMemoryRange ( nRows , nCols ) ;
99+
100+ for ( int row = 0 ; row < nRows ; row ++ )
101+ {
102+ for ( int col = 0 ; col < nCols ; col ++ )
103+ {
104+ var textValue = GetValue ( texts , arguments [ 0 ] , textRows , textCols , row , col ) ;
105+ var patternValue = GetValue ( patterns , arguments [ 1 ] , patternRows , patternCols , row , col ) ;
106+
107+ if ( textValue == null || patternValue == null )
108+ {
109+ result . SetValue ( row , col , ExcelErrorValue . Create ( eErrorType . NA ) ) ;
110+ }
111+ // Use the same validation as the scalar branch. The previous Math.Abs check
112+ // let negative arguments through, which fell into mode 0 or reached
113+ // (RegexOptions)(-1). Excel returns #VALUE! per cell for these.
114+ else if ( caseSensitivity > 1 || caseSensitivity < 0 || returnMode < 0 || returnMode > 2 )
115+ {
116+ result . SetValue ( row , col , ExcelErrorValue . Create ( eErrorType . Value ) ) ;
117+ }
118+ else
119+ {
120+ // Compute per cell and catch invalid-pattern exceptions here so that a
121+ // single bad cell becomes #VALUE! in place, while the other cells are
122+ // still calculated (verified against Excel).
123+ try
124+ {
125+ var options = ( RegexOptions ) caseSensitivity ;
126+ if ( returnMode == 2 )
127+ {
128+ // A failed match reports Groups.Count == 1, so the number of
129+ // capturing groups must be read from the pattern itself (via
130+ // GetGroupNumbers, which includes group 0) rather than from the
131+ // match. No groups -> #VALUE!; groups but no match -> #N/A.
132+ var regex = new Regex ( patternValue , options ) ;
133+ if ( regex . GetGroupNumbers ( ) . Length <= 1 )
134+ {
135+ result . SetValue ( row , col , ExcelErrorValue . Create ( eErrorType . Value ) ) ;
136+ }
137+ else
138+ {
139+ var match = regex . Match ( textValue ) ;
140+ if ( ! match . Success )
141+ {
142+ result . SetValue ( row , col , ExcelErrorValue . Create ( eErrorType . NA ) ) ;
143+ }
144+ else
145+ {
146+ // In range mode only the first group is returned per cell.
147+ result . SetValue ( row , col , match . Groups [ 1 ] . Value ) ;
148+ }
149+ }
150+ }
151+ else if ( returnMode == 1 )
152+ {
153+ var matches = GetMatches ( textValue , patternValue , caseSensitivity ) ;
154+ if ( matches . Length == 0 )
155+ {
156+ result . SetValue ( row , col , ExcelErrorValue . Create ( eErrorType . NA ) ) ;
157+ }
158+ else
159+ {
160+ // In range mode only the first match is returned per cell.
161+ result . SetValue ( row , col , matches [ 0 ] ) ;
162+ }
163+ }
164+ else
165+ {
166+ var match = Regex . Match ( textValue , patternValue , options ) ;
167+ if ( ! match . Success )
168+ {
169+ result . SetValue ( row , col , ExcelErrorValue . Create ( eErrorType . NA ) ) ;
170+ }
171+ else
172+ {
173+ result . SetValue ( row , col , match . Value ) ;
174+ }
175+ }
176+ }
177+ catch ( ArgumentException )
178+ {
179+ // Invalid regex pattern in this cell -> #VALUE! for this cell only.
180+ result . SetValue ( row , col , ExcelErrorValue . Create ( eErrorType . Value ) ) ;
181+ }
182+ }
183+ }
184+ }
185+
186+ return CreateDynamicArrayResult ( result , DataType . ExcelRange ) ;
187+ }
188+
189+ private string [ ] GetMatches ( string text , string pattern , int caseSensitive )
190+ {
191+ return Regex . Matches ( text , pattern , ( RegexOptions ) caseSensitive )
192+ . Cast < System . Text . RegularExpressions . Match > ( )
193+ . Select ( m => m . Value )
194+ . ToArray ( ) ;
195+ }
196+ }
197+ }
0 commit comments