Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial offset design ideas #12015

Draft
wants to merge 14 commits into
base: develop
Choose a base branch
from
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ type Feature
## PRIVATE
cross join two tables.
Cross_Join
## PRIVATE
offset one or more columns
Offset

## PRIVATE
Check if a feature is supported by a dialect, and throw an error if it is not.
Expand Down
42 changes: 42 additions & 0 deletions distribution/lib/Standard/Table/0.0.0-dev/src/Column.enso
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,14 @@ polyglot java import org.enso.table.error.ValueTypeMismatchException
polyglot java import org.enso.table.operations.OrderBuilder
polyglot java import org.enso.table.parsing.problems.ParseProblemAggregator

type OffFill
Nothing
Closest_Value
Wrap_Around
Constant c

OffFill.from (that:Any) = OffFill.Constant that

type Column
## GROUP Standard.Base.Input
ICON data_input
Expand Down Expand Up @@ -2607,6 +2615,40 @@ type Column
data = Statistic.running self.to_vector statistic
Column.from_vector name data

## ALIAS shift
GROUP Standard.Base.Values
ICON column_add

Returns a new column, offset by n rows, where missing values have been replaced with the
provided default.

Arguments:
- n: The number of rows to offset the new column by.
- default: The value to replace missing values with.

> Example
TODO
@n (Numeric_Input display=..Always)
@default (self-> Widget_Helpers.make_fill_default_value_selector2 value_types=self.value_type)
offset : Integer -> Column | Previous_Value | Any -> Column
offset self n=-1:Integer default:OffFill=..Nothing -> Column =
if self.length == 0 then self else
as_vector = self.to_vector
adjusted_n = case default of
OffFill.Wrap_Around -> n.abs % self.length
_ -> n.abs
fill_vector = case default of
OffFill.Nothing -> Vector.fill adjusted_n Nothing
OffFill.Closest_Value -> if n<0 then Vector.fill adjusted_n (as_vector.at 0) else Vector.fill adjusted_n (as_vector.at (self.length-1))
OffFill.Wrap_Around -> if n<0 then as_vector.take (..Last adjusted_n) else as_vector.take (..First adjusted_n)
OffFill.Constant constant -> Vector.fill adjusted_n constant

offset_vector = if n > 0 then as_vector + fill_vector else
fill_vector + as_vector
resized_vector = if n > 0 then offset_vector.take (..Last self.length) else
offset_vector.take (..First self.length)
Column.from_vector self.name resized_vector

## PRIVATE
pretty : Text
pretty self =
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
private

from Standard.Base import all
import project.Table.Table

import project.Column.Column
import project.Internal.Java_Problems
import project.Set_Mode.Set_Mode
import project.Internal.Table_Helpers
import project.Internal.Problem_Builder.Problem_Builder

polyglot java import org.enso.table.operations.Offset as Java_Offset

table_offset_impl table:Table (of:Text|Integer=0) n:Integer=-1 (set_mode:Set_Mode=..Add) (group_by:(Vector | Text | Integer | Regex)=[]) (order_by:(Vector | Text)=[]) (on_problems:Problem_Behavior=..Report_Warning) -> Table =
new_name = "TEMP"
of_col = table.at of
problem_builder = Problem_Builder.new error_on_missing_columns=True
grouping_columns = table.columns_helper.select_columns_helper group_by Case_Sensitivity.Default True problem_builder
ordering = Table_Helpers.resolve_order_by table.columns order_by problem_builder
source_java_column = of_col.java_column
grouping_java_columns = grouping_columns.map c->c.java_column
ordering_java_columns = ordering.map c->
c.column.java_column
directions = ordering.map c->
c.associated_selector.direction.to_sign
new_storage = Java_Problems.with_problem_aggregator on_problems java_problem_aggregator->
Java_Offset.offset source_java_column n grouping_java_columns ordering_java_columns directions java_problem_aggregator
new_column = Column.from_storage new_name new_storage
table.set new_column new_name set_mode
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,16 @@ make_fill_default_value_selector column_source=Nothing display:Display=..Always
previous_value = [Option 'Previous Value' 'Previous_Value']
Single_Choice values=(previous_value+column_ref.values) display=display

## PRIVATE
If `column_source` is Nothing, `Column_Ref` options will not be added.
make_fill_default_value_selector2 column_source=Nothing display:Display=..When_Modified value_types=Value_Type.Mixed add_nothing:Boolean=False =
to_add = Types_To_Include.Value (Vector.unify_vector_or_element value_types)
column_ref = make_column_ref_by_name_selector column_source display add_text=to_add.text add_number=to_add.number add_boolean=to_add.boolean add_date=to_add.date add_time=to_add.time add_date_time=to_add.date_time add_nothing=add_nothing
previous_value = [Option 'Closest Value' '..Closest_Value']
nothing_option = [Option 'Nothing' '..Nothing']
wrap_around_option = [Option 'Wrap Around' '..Wrap_Around']
Single_Choice values=(nothing_option+previous_value+wrap_around_option+column_ref.values) display=display

## PRIVATE
Make a filter condition selector.
make_filter_condition_selector table display:Display=..Always =
Expand Down
23 changes: 23 additions & 0 deletions distribution/lib/Standard/Table/0.0.0-dev/src/Table.enso
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ import project.Internal.Java_Problems
import project.Internal.Join_Helpers
import project.Internal.Lookup_Helpers
import project.Internal.Lookup_Helpers.Lookup_Column
import project.Internal.Offset
import project.Internal.Parse_Values_Helper
import project.Internal.Problem_Builder.Problem_Builder
import project.Internal.Read_Many_Helpers
Expand Down Expand Up @@ -3785,6 +3786,28 @@ type Table
running self (statistic:Statistic=..Count) (of:(Text | Integer)=0) (as:Text='') (set_mode:Set_Mode=..Add) (group_by:(Vector | Text | Integer | Regex)=[]) (order_by:(Vector | Text)=[]) (on_problems:Problem_Behavior=..Report_Warning) =
Incomparable_Values.handle_errors <| Add_Running.add_running self statistic of as set_mode group_by order_by on_problems

## ALIAS shift
GROUP Standard.Base.Values
ICON column_add

Returns a new column, offset by n rows, where missing values have been replaced with the
provided default.

Arguments:
- n: The number of rows to offset the new column by.
- default: The value to replace missing values with.

> Example
TODO
@columns Widget_Helpers.make_column_name_multi_selector
@n (Widget.Numeric_Input display=..Always)
@default (self-> Widget_Helpers.make_fill_default_value_selector2)
@group_by Widget_Helpers.make_column_name_multi_selector
@order_by Widget_Helpers.make_order_by_selector
offset self columns:(Vector (Integer | Text | Regex | By_Type)) n:Integer=-1 default=Nothing (group_by:(Vector | Text | Integer | Regex)=[]) (order_by:(Vector | Text)=[]) (set_mode:Set_Mode=..Add) (on_problems:Problem_Behavior=..Report_Warning) -> Table =
_ = [columns, n, default, group_by, order_by, set_mode, on_problems]
Offset.table_offset_impl self 0 n

## PRIVATE
column_naming_helper : Column_Naming_Helper
column_naming_helper self = Column_Naming_Helper.in_memory
Expand Down
115 changes: 115 additions & 0 deletions std-bits/table/src/main/java/org/enso/table/operations/Offset.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
package org.enso.table.operations;

import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.table.Column;
import org.enso.table.problems.ProblemAggregator;

import java.util.BitSet;
import java.util.LinkedList;
import java.util.Queue;

import org.apache.commons.math3.analysis.function.Abs;
import org.apache.poi.xssf.model.ThemesTable;
import org.enso.base.polyglot.NumericConverter;
import org.enso.table.data.column.storage.numeric.DoubleStorage;
import org.enso.table.data.column.storage.numeric.LongStorage;
import org.enso.table.data.column.storage.type.IntegerType;
import org.enso.table.problems.ColumnAggregatedProblemAggregator;

public class Offset {
public static Storage<?> offset(
Column sourceColumn,
int n,
Column[] groupingColumns,
Column[] orderingColumns,
int[] directions,
ProblemAggregator problemAggregator) {
var offsetRunningStatistic = new OffsetRunningStatistic<Long>(sourceColumn, n, problemAggregator);
RunningLooper.loop(
groupingColumns,
orderingColumns,
directions,
problemAggregator,
offsetRunningStatistic,
sourceColumn.getSize());
return offsetRunningStatistic.getResult();
}

private static class OffsetRunningStatistic<T> implements RunningStatistic<Long> {

long[] result;
BitSet isNothing;
ColumnAggregatedProblemAggregator columnAggregatedProblemAggregator;
Column sourceColumn;
int n;

OffsetRunningStatistic(Column sourceColumn, int n, ProblemAggregator problemAggregator) {
result = new long[sourceColumn.getSize()];
isNothing = new BitSet();
columnAggregatedProblemAggregator = new ColumnAggregatedProblemAggregator(problemAggregator);
this.sourceColumn = sourceColumn;
this.n = n;
}

@Override
public void calculateNextValue(int i, RunningIterator<Long> it) {
Object value = sourceColumn.getStorage().getItemBoxed(i);
Long dValue = NumericConverter.tryConvertingToLong(value);
if (n<0) {
Long dNextValue = it.next(dValue);
if (dNextValue == null) {
isNothing.set(i);
} else {
result[i] = dNextValue;
}
} else {
Long dNextPosition = it.next(Long.valueOf(i));
if (dNextPosition != null) {
result[dNextPosition.intValue()] = dValue;
}
}
}

@Override
public Storage<Long> getResult() {
return new LongStorage(result, sourceColumn.getSize(), isNothing, IntegerType.INT_64);
}

@Override
public RunningIterator<Long> getNewIterator() {
return new OffsetRunning(n);
}
}


private static class OffsetRunning implements RunningIterator<Long> {
Queue<Long> queue;
int n;
int current_n;

public OffsetRunning(int n)
{
this.queue = new LinkedList<>();
this.n = n;
this.current_n = 0;
}

@Override
public Long next(Long value) {
queue.add(value);
current_n++;
if (current_n > Math.abs(n)) {
return queue.poll();
} else {
return null;
}
}

@Override
public Long currentValue() {
// if nn
return queue.peek();
}

}
}
44 changes: 23 additions & 21 deletions test/Table_Tests/src/Common_Table_Operations/Main.enso
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import project.Common_Table_Operations.Map_Spec
import project.Common_Table_Operations.Map_To_Table_Spec
import project.Common_Table_Operations.Missing_Values_Spec
import project.Common_Table_Operations.Nothing_Spec
import project.Common_Table_Operations.Offset_Spec
import project.Common_Table_Operations.Order_By_Spec
import project.Common_Table_Operations.Select_Columns_Spec
import project.Common_Table_Operations.Take_Drop_Spec
Expand Down Expand Up @@ -118,36 +119,37 @@ type Test_Selection
self.run_advanced_edge_case_tests_by_default || (Environment.get "ENSO_ADVANCED_EDGE_CASE_TESTS" . is_nothing . not)

add_specs suite_builder setup =
Core_Spec.add_specs suite_builder setup
Select_Columns_Spec.add_specs suite_builder setup
Add_Group_Number_Spec.add_specs suite_builder setup
Add_Row_Number_Spec.add_specs suite_builder setup
Aggregate_Spec.add_specs suite_builder setup
Coalesce_Spec.add_specs suite_builder setup
Column_Name_Edge_Cases_Spec.add_specs suite_builder setup
Column_Operations_Spec.add_specs suite_builder setup
Derived_Columns_Spec.add_specs suite_builder setup
Date_Time_Spec.add_specs suite_builder setup
Conversion_Spec.add_specs suite_builder setup
Aggregate_Spec.add_specs suite_builder setup
Core_Spec.add_specs suite_builder setup
Cross_Join_Spec.add_specs suite_builder setup
Cross_Tab_Spec.add_specs suite_builder setup
Date_Time_Spec.add_specs suite_builder setup
Derived_Columns_Spec.add_specs suite_builder setup
Distinct_Spec.add_specs suite_builder setup
Expression_Spec.add_specs suite_builder detailed=False setup
Filter_Spec.add_specs suite_builder setup
Integration_Tests.add_specs suite_builder setup
Join_Spec.add_specs suite_builder setup
Lookup_Spec.add_specs suite_builder setup
Map_Spec.add_specs suite_builder setup
Map_To_Table_Spec.add_specs suite_builder setup
Missing_Values_Spec.add_specs suite_builder setup
Nothing_Spec.add_specs suite_builder setup
Offset_Spec.add_specs suite_builder setup
Order_By_Spec.add_specs suite_builder setup
Take_Drop_Spec.add_specs suite_builder setup
Expression_Spec.add_specs suite_builder detailed=False setup
Join_Spec.add_specs suite_builder setup
Cross_Join_Spec.add_specs suite_builder setup
Zip_Spec.add_specs suite_builder setup
Union_Spec.add_specs suite_builder setup
Lookup_Spec.add_specs suite_builder setup
Replace_Spec.add_specs suite_builder setup
Map_To_Table_Spec.add_specs suite_builder setup
Distinct_Spec.add_specs suite_builder setup
Cross_Tab_Spec.add_specs suite_builder setup
Transpose_Spec.add_specs suite_builder setup
Add_Group_Number_Spec.add_specs suite_builder setup
Add_Row_Number_Spec.add_specs suite_builder setup
Integration_Tests.add_specs suite_builder setup
Select_Columns_Spec.add_specs suite_builder setup
Take_Drop_Spec.add_specs suite_builder setup
Temp_Column_Spec.add_specs suite_builder setup
Nothing_Spec.add_specs suite_builder setup
Text_Cleanse_Spec.add_specs suite_builder setup
Coalesce_Spec.add_specs suite_builder setup
Transpose_Spec.add_specs suite_builder setup
Union_Spec.add_specs suite_builder setup
Zip_Spec.add_specs suite_builder setup

main filter=Nothing = run_default_backend add_specs filter
Loading
Loading