@@ -667,6 +667,275 @@ public DataFrame Append(IEnumerable<KeyValuePair<string, object>> row, bool inPl
667667 return ret ;
668668 }
669669
670+ /// <summary>
671+ /// Transforms the DataFrame from wide format to long format by unpivoting specified columns.
672+ /// This operation takes multiple value columns and "melts" them into two columns: one containing
673+ /// the original column names (variable) and one containing the values.
674+ /// </summary>
675+ /// <param name="idColumns">
676+ /// Column names to use as identifier variables. These columns will be repeated in the output
677+ /// for each value column. Must contain at least one column name.
678+ /// </param>
679+ /// <param name="valueColumns">
680+ /// Column names to unpivot into the variable and value columns. If null, all columns not
681+ /// specified in <paramref name="idColumns"/> will be used as value columns.
682+ /// </param>
683+ /// <param name="variableName">
684+ /// Name for the new column that will contain the original value column names. Defaults to "variable".
685+ /// </param>
686+ /// <param name="valueName">
687+ /// Name for the new column that will contain the values from the unpivoted columns. Defaults to "value".
688+ /// If value columns contain different types, this column will be of type string; otherwise, it will
689+ /// match the type of the first value column.
690+ /// </param>
691+ /// <param name="dropNulls">
692+ /// If true, rows where the value is null will be excluded from the result.
693+ /// Defaults to false.
694+ /// </param>
695+ /// <returns>
696+ /// A new DataFrame in long format with columns for each ID column, plus the variable and value columns.
697+ /// The number of rows will be approximately (number of original rows × number of value columns),
698+ /// or fewer if <paramref name="dropNulls"/> is true.
699+ /// </returns>
700+ /// <exception cref="ArgumentException">
701+ /// Thrown when <paramref name="idColumns"/> is empty, when <paramref name="valueColumns"/> is specified
702+ /// but empty, or when any column appears in both <paramref name="idColumns"/> and <paramref name="valueColumns"/>.
703+ /// </exception>
704+ /// <exception cref="InvalidOperationException">
705+ /// Thrown when <paramref name="valueColumns"/> is null and there are no columns available to use as
706+ /// value columns after excluding the ID columns.
707+ /// </exception>
708+ /// <example>
709+ /// <code>
710+ /// // Original DataFrame:
711+ /// // | ID | Name | 2020 | 2021 | 2022 |
712+ /// // |----|-------|------|------|------|
713+ /// // | 1 | Alice | 100 | 110 | 120 |
714+ /// // | 2 | Bob | 200 | 210 | 220 |
715+ ///
716+ /// var melted = df.Melt(
717+ /// idColumns: new[] { "ID", "Name" },
718+ /// valueColumns: new[] { "2020", "2021", "2022" },
719+ /// variableName: "Year",
720+ /// valueName: "Sales"
721+ /// );
722+ ///
723+ /// // Result:
724+ /// // | ID | Name | Year | Sales |
725+ /// // |----|-------|------|-------|
726+ /// // | 1 | Alice | 2020 | 100 |
727+ /// // | 2 | Bob | 2020 | 200 |
728+ /// // | 1 | Alice | 2021 | 110 |
729+ /// // | 2 | Bob | 2021 | 210 |
730+ /// // | 1 | Alice | 2022 | 120 |
731+ /// // | 2 | Bob | 2022 | 220 |
732+ /// </code>
733+ /// </example>
734+ /// <remarks>
735+ /// Note: The output rows are ordered by value column (all rows for the first value column,
736+ /// then all rows for the second, etc.), which differs from pandas.melt() which orders by
737+ /// source row.
738+ /// </remarks>
739+ public DataFrame Melt ( IEnumerable < string > idColumns , IEnumerable < string > valueColumns = null , string variableName = "variable" , string valueName = "value" , bool dropNulls = false )
740+ {
741+ if ( string . IsNullOrWhiteSpace ( variableName ) )
742+ {
743+ throw new ArgumentException ( Strings . ParameterMustNotBeNullOrWhitespace , nameof ( variableName ) ) ;
744+ }
745+
746+ if ( string . IsNullOrWhiteSpace ( valueName ) )
747+ {
748+ throw new ArgumentException ( Strings . ParameterMustNotBeNullOrWhitespace , nameof ( valueName ) ) ;
749+ }
750+
751+ if ( idColumns == null )
752+ {
753+ throw new ArgumentNullException ( nameof ( idColumns ) ) ;
754+ }
755+
756+ var idColumnList = idColumns . ToList ( ) ;
757+
758+ HashSet < string > idColumnSet = null ;
759+
760+ if ( valueColumns is null )
761+ {
762+ idColumnSet = [ .. idColumnList ] ;
763+ }
764+
765+ var valueColumnList = valueColumns ? . ToList ( )
766+ ?? _columnCollection
767+ . Where ( c => ! idColumnSet . Contains ( c . Name ) )
768+ . Select ( c => c . Name )
769+ . ToList ( ) ;
770+
771+ if ( idColumnList . Count == 0 )
772+ {
773+ throw new ArgumentException ( Strings . MissingIdColumns , nameof ( idColumns ) ) ;
774+ }
775+
776+ if ( valueColumns != null && valueColumnList . Count == 0 )
777+ {
778+ throw new ArgumentException ( Strings . MissingValueColumns , nameof ( valueColumns ) ) ;
779+ }
780+
781+ if ( valueColumns != null && valueColumnList . Any ( v => idColumnList . Contains ( v ) ) )
782+ {
783+ throw new ArgumentException ( Strings . DuplicateColumnsInIdAndValueLists , nameof ( valueColumns ) ) ;
784+ }
785+
786+ if ( valueColumns == null && valueColumnList . Count == 0 )
787+ {
788+ throw new InvalidOperationException ( Strings . NoValueColumnsRemaining ) ;
789+ }
790+
791+ if ( _columnCollection . IndexOf ( variableName ) >= 0 )
792+ {
793+ throw new ArgumentException ( string . Format ( Strings . VariableNameAlreadyExists , variableName ) , nameof ( variableName ) ) ;
794+ }
795+
796+ if ( _columnCollection . IndexOf ( valueName ) >= 0 )
797+ {
798+ throw new ArgumentException ( string . Format ( Strings . ValueNameAlreadyExists , valueName ) , nameof ( valueName ) ) ;
799+ }
800+
801+ if ( string . Equals ( variableName , valueName ) )
802+ {
803+ throw new ArgumentException ( string . Format ( Strings . VariableNameAndValueNameMustBeDifferent , nameof ( variableName ) , nameof ( valueName ) ) , nameof ( valueName ) ) ;
804+ }
805+
806+ foreach ( var columnName in idColumnList )
807+ {
808+ if ( _columnCollection . IndexOf ( columnName ) < 0 )
809+ {
810+ throw new ArgumentException ( string . Format ( Strings . InvalidColumnName , columnName ) , nameof ( idColumns ) ) ;
811+ }
812+ }
813+
814+ foreach ( var columnName in valueColumnList )
815+ {
816+ if ( _columnCollection . IndexOf ( columnName ) < 0 )
817+ {
818+ throw new ArgumentException ( string . Format ( Strings . InvalidColumnName , columnName ) , nameof ( valueColumns ) ) ;
819+ }
820+ }
821+
822+ long totalOutputRows = CalculateTotalOutputRows ( valueColumnList , dropNulls ) ;
823+
824+ var outputCols = InitializeIdColumns ( idColumnList , totalOutputRows ) ;
825+ var variableColumn = new StringDataFrameColumn ( variableName , totalOutputRows ) ;
826+ var valueColumn = CreateValueColumn ( valueColumnList , valueName , totalOutputRows ) ;
827+
828+ FillMeltedData ( idColumnList , valueColumnList , outputCols , variableColumn , valueColumn , dropNulls ) ;
829+
830+ outputCols . Add ( variableColumn ) ;
831+ outputCols . Add ( valueColumn ) ;
832+
833+ return new DataFrame ( outputCols ) ;
834+ }
835+
836+ private long CalculateTotalOutputRows ( List < string > valueColumnList , bool dropNulls )
837+ {
838+ if ( ! dropNulls )
839+ {
840+ return _rowCollection . Count * valueColumnList . Count ;
841+ }
842+
843+ long total = 0 ;
844+
845+ foreach ( var columnName in valueColumnList )
846+ {
847+ var column = _columnCollection [ columnName ] ;
848+
849+ foreach ( var item in column )
850+ {
851+ if ( item != null )
852+ {
853+ total ++ ;
854+ }
855+ }
856+ }
857+
858+ return total ;
859+ }
860+
861+ private List < DataFrameColumn > InitializeIdColumns ( List < string > idColumnList , long size )
862+ {
863+ PrimitiveDataFrameColumn < long > empty = new PrimitiveDataFrameColumn < long > ( "Empty" ) ;
864+ var outputCols = new List < DataFrameColumn > ( idColumnList . Count ) ;
865+
866+ foreach ( var idColumnName in idColumnList )
867+ {
868+ var sourceColumn = _columnCollection [ idColumnName ] ;
869+ var newColumn = sourceColumn . Clone ( empty ) ;
870+ newColumn . Resize ( size ) ;
871+ outputCols . Add ( newColumn ) ;
872+ }
873+
874+ return outputCols ;
875+ }
876+
877+ private DataFrameColumn CreateValueColumn ( List < string > valueColumnList , string valueName , long size )
878+ {
879+ var valueTypes = valueColumnList
880+ . Select ( name => _columnCollection [ name ] . DataType )
881+ . Distinct ( )
882+ . Count ( ) ;
883+
884+ DataFrameColumn valueColumn ;
885+
886+ if ( valueTypes > 1 )
887+ {
888+ valueColumn = new StringDataFrameColumn ( valueName , size ) ;
889+ }
890+ else
891+ {
892+ PrimitiveDataFrameColumn < long > empty = new PrimitiveDataFrameColumn < long > ( "Empty" ) ;
893+ valueColumn = _columnCollection [ valueColumnList [ 0 ] ] . Clone ( empty ) ;
894+ valueColumn . SetName ( valueName ) ;
895+ valueColumn . Resize ( size ) ;
896+ }
897+
898+ return valueColumn ;
899+ }
900+
901+ private void FillMeltedData ( List < string > idColumnList , List < string > valueColumnList , List < DataFrameColumn > outputIdCols , StringDataFrameColumn variableColumn , DataFrameColumn valueColumn , bool dropNulls )
902+ {
903+ bool convertToString = valueColumn is StringDataFrameColumn ;
904+ long currentRow = 0 ;
905+ long rowCount = _rowCollection . Count ;
906+ int idColumnCount = idColumnList . Count ;
907+
908+ var idColumns = new DataFrameColumn [ idColumnCount ] ;
909+ for ( int i = 0 ; i < idColumnCount ; i ++ )
910+ {
911+ idColumns [ i ] = _columnCollection [ idColumnList [ i ] ] ;
912+ }
913+
914+ foreach ( var valueColumnName in valueColumnList )
915+ {
916+ var sourceValueColumn = _columnCollection [ valueColumnName ] ;
917+
918+ for ( long sourceRow = 0 ; sourceRow < rowCount ; sourceRow ++ )
919+ {
920+ var value = sourceValueColumn [ sourceRow ] ;
921+
922+ if ( dropNulls && ( value == null ) )
923+ {
924+ continue ;
925+ }
926+
927+ for ( int i = 0 ; i < idColumnCount ; i ++ )
928+ {
929+ outputIdCols [ i ] [ currentRow ] = idColumns [ i ] [ sourceRow ] ;
930+ }
931+
932+ variableColumn [ currentRow ] = valueColumnName ;
933+ valueColumn [ currentRow ] = convertToString ? value ? . ToString ( ) : value ;
934+ currentRow ++ ;
935+ }
936+ }
937+ }
938+
670939 /// <summary>
671940 /// Invalidates any cached data after a column has changed.
672941 /// </summary>
0 commit comments