@@ -1809,6 +1809,139 @@ def scale_to_z_score_per_key(tensor, key, var_name=''):
18091809 preprocessing_fn , expected_data ,
18101810 expected_metadata )
18111811
1812+ @tft_unit .named_parameters (
1813+ dict (
1814+ testcase_name = '_float' ,
1815+ input_data = [
1816+ {
1817+ 'x' : [- 4 , 0 ],
1818+ 'key' : 'a' ,
1819+ },
1820+ {
1821+ 'x' : [10 , 0 ],
1822+ 'key' : 'a' ,
1823+ },
1824+ {
1825+ 'x' : [2 , 0 ],
1826+ 'key' : 'a' ,
1827+ },
1828+ {
1829+ 'x' : [4 , 0 ],
1830+ 'key' : 'a' ,
1831+ },
1832+ {
1833+ 'x' : [1 , 0 ],
1834+ 'key' : 'b' ,
1835+ },
1836+ {
1837+ 'x' : [- 1 , 0 ],
1838+ 'key' : 'b' ,
1839+ },
1840+ {
1841+ 'x' : [np .nan , np .nan ],
1842+ 'key' : 'b' ,
1843+ },
1844+ ],
1845+ # Elementwise = True
1846+ # Mean [a, b] = [[ 3.0, 0.0], [0.0, 0.0]]
1847+ # Variance [a, b] = [[25.0, 0.0], [1.0, 0.0]]
1848+ # StdDev [a, b] = [[ 5.0, 0.0], [1.0, 0.0]]
1849+ expected_data = [
1850+ {
1851+ 'x_scaled' : [- 1.4 , 0.0 ], # [(-4 - 3) / 5, (0 - 0) / 0]
1852+ },
1853+ {
1854+ 'x_scaled' : [1.4 , 0.0 ] # [(10 - 3) / 5, (0 - 0) / 0]
1855+ },
1856+ {
1857+ 'x_scaled' : [- 0.2 , 0.0 ] # [(2 - 3) / 5, (0 - 0) / 0]
1858+ },
1859+ {
1860+ 'x_scaled' : [0.2 , 0.0 ], # [(4 - 3) / 5, (0 - 0) / 0]
1861+ },
1862+ {
1863+ 'x_scaled' : [1.0 , 0.0 ] # [(1 - 0) / 1, (0 - 0) / 0]
1864+ },
1865+ {
1866+ 'x_scaled' : [- 1.0 , 0.0 ] # [(-1 - 0) / 1, (0 - 0) / 0]
1867+ },
1868+ {
1869+ 'x_scaled' : [np .nan , np .nan ]
1870+ },
1871+ ],
1872+ input_metadata = tft .DatasetMetadata .from_feature_spec ({
1873+ 'x' : tf .io .FixedLenFeature ([2 ], tf .float32 ),
1874+ 'key' : tf .io .FixedLenFeature ([], tf .string ),
1875+ }),
1876+ expected_metadata = tft .DatasetMetadata .from_feature_spec ({
1877+ 'x_scaled' : tf .io .FixedLenFeature ([2 ], tf .float32 ),
1878+ })),
1879+ dict (
1880+ testcase_name = 'float_3dims' ,
1881+ input_data = [
1882+ {
1883+ 'x' : [[- 4 , - 8 ], [- 12 , - 16 ]],
1884+ 'key' : 'a' ,
1885+ },
1886+ {
1887+ 'x' : [[10 , 20 ], [30 , 40 ]],
1888+ 'key' : 'a' ,
1889+ },
1890+ {
1891+ 'x' : [[2 , 4 ], [6 , 8 ]],
1892+ 'key' : 'a' ,
1893+ },
1894+ {
1895+ 'x' : [[4 , 8 ], [12 , 16 ]],
1896+ 'key' : 'a' ,
1897+ },
1898+ {
1899+ 'x' : [[1 , 2 ], [3 , 4 ]],
1900+ 'key' : 'b' ,
1901+ },
1902+ ],
1903+ expected_data = [
1904+ {
1905+ 'x_scaled' : [[- 1.4 , - 1.4 ], [- 1.4 , - 1.4 ]],
1906+ },
1907+ {
1908+ 'x_scaled' : [[1.4 , 1.4 ], [1.4 , 1.4 ]],
1909+ },
1910+ {
1911+ 'x_scaled' : [[- 0.2 , - 0.2 ], [- 0.2 , - 0.2 ]],
1912+ },
1913+ {
1914+ 'x_scaled' : [[0.2 , 0.2 ], [0.2 , 0.2 ]],
1915+ },
1916+ {
1917+ 'x_scaled' : [[0.0 , 0.0 ], [0.0 , 0.0 ]],
1918+ },
1919+ ],
1920+ input_metadata = tft .DatasetMetadata .from_feature_spec ({
1921+ 'x' : tf .io .FixedLenFeature ([2 , 2 ], tf .float32 ),
1922+ 'key' : tf .io .FixedLenFeature ([], tf .string ),
1923+ }),
1924+ expected_metadata = tft .DatasetMetadata .from_feature_spec ({
1925+ 'x_scaled' : tf .io .FixedLenFeature ([2 , 2 ], tf .float32 ),
1926+ })),
1927+ )
1928+ def testScaleToZScorePerKeyElementwise (self , input_data , expected_data ,
1929+ input_metadata , expected_metadata ):
1930+
1931+ def preprocessing_fn (inputs ):
1932+ outputs = {}
1933+ outputs ['x_scaled' ] = tft .scale_to_z_score_per_key (
1934+ tf .cast (inputs ['x' ], tf .float32 ),
1935+ key = inputs ['key' ],
1936+ elementwise = True ,
1937+ key_vocabulary_filename = None )
1938+ self .assertEqual (outputs ['x_scaled' ].dtype , tf .float32 )
1939+ return outputs
1940+
1941+ self .assertAnalyzeAndTransformResults (input_data , input_metadata ,
1942+ preprocessing_fn , expected_data ,
1943+ expected_metadata )
1944+
18121945 @tft_unit .parameters (
18131946 (tf .int16 ,),
18141947 (tf .int32 ,),
@@ -1975,6 +2108,48 @@ def analyzer_fn(inputs):
19752108 expected_outputs ,
19762109 desired_batch_size = 10 )
19772110
2111+ def testMeanAndVarPerKeyElementwise (self ):
2112+
2113+ def analyzer_fn (inputs ):
2114+ key_vocab , mean , var = analyzers ._mean_and_var_per_key (
2115+ inputs ['x' ], inputs ['key' ], reduce_instance_dims = False )
2116+ return {
2117+ 'key_vocab' : key_vocab ,
2118+ 'mean' : mean ,
2119+ 'var' : tf .round (100 * var ) / 100.0
2120+ }
2121+
2122+ input_data = input_data = [{
2123+ 'x' : [- 4 , - 1 ],
2124+ 'key' : 'a' ,
2125+ }, {
2126+ 'x' : [10 , 0 ],
2127+ 'key' : 'a' ,
2128+ }, {
2129+ 'x' : [2 , 0 ],
2130+ 'key' : 'a' ,
2131+ }, {
2132+ 'x' : [4 , - 1 ],
2133+ 'key' : 'a' ,
2134+ }, {
2135+ 'x' : [10 , 0 ],
2136+ 'key' : 'b' ,
2137+ }, {
2138+ 'x' : [0 , 10 ],
2139+ 'key' : 'b' ,
2140+ }]
2141+ input_metadata = tft .DatasetMetadata .from_feature_spec ({
2142+ 'x' : tf .io .FixedLenFeature ([2 ], tf .float32 ),
2143+ 'key' : tf .io .FixedLenFeature ([], tf .string )
2144+ })
2145+ expected_outputs = {
2146+ 'key_vocab' : np .array ([b'a' , b'b' ], np .object ),
2147+ 'mean' : np .array ([[3.0 , - 0.5 ], [5.0 , 5.0 ]], np .float32 ),
2148+ 'var' : np .array ([[25.0 , 0.25 ], [25.0 , 25.0 ]], np .float32 )
2149+ }
2150+ self .assertAnalyzerOutputs (input_data , input_metadata , analyzer_fn ,
2151+ expected_outputs )
2152+
19782153 @tft_unit .named_parameters (
19792154 dict (
19802155 testcase_name = '_dense_2d' ,
0 commit comments