TSV String, Remove Column-Duplicate Lines

TSV String, Remove Column-Duplicate Lines

TSV 文字列, カラム重複行の削除

Extracts and deletes rows with duplicate column elements. In forward mode it keeps the first row, in reverse mode it keeps the last row. Note that “ABC” and “Abc” are recognized as different columns. (case sensitive)

Auto Step icon
Configs for this Auto Step
StrConfA1
A1: Set Input TSV text *#{EL}
BoolConfB1
B1: Forward mode or Reverse mode
StrConfB2
B2: Set Column IDs to be Evaluated (eg. “0,3”) *#{EL}
SelectConfC1
C1: Select STRING DATA for Output TSV Text (update)
SelectConfC2
C2: Select STRING DATA for Removed Lines (update)
Script (click to open)
// GraalJS Script (engine type: 2)

//////// START "main()" ////////////////////////////////////////////////////////////////
main();
function main(){ 

//// == Config Retrieving / 工程コンフィグの参照 ==
const strInput         = configs.get      ( "StrConfA1" );    /// REQUIRED
  if( strInput       === "" ){
    throw new Error( "\n AutomatedTask ConfigError:" +
                     " Config {A1: String} is empty \n" );
  }
const boolReverseMode  = configs.getObject( "BoolConfB1" );   /// TOGGLE
  // https://questetra.zendesk.com/hc/ja/articles/360024574471-R2300 "Boolean object"
  if( boolReverseMode ){ // true
    engine.log( " AutomatedTask Config: Reverse order Search");
  }else{
    engine.log( " AutomatedTask Config: Forward order Search");
  }
const strKeyColIds     = configs.get      ( "StrConfB2" );    /// REQUIRED
  if( strKeyColIds   === "" ){
    throw new Error( "\n AutomatedTask ConfigError:" +
                     " Config {B2: KeyColIds} is empty \n" );
  }
  const regNonNegativeIntCsv = /^[0-9,]*$/; // RegExp
  if( ! regNonNegativeIntCsv.test( strKeyColIds ) ){
    throw new Error( "\n AutomatedTask ConfigError:" +
                     " Config {B2: KeyColIds} allows only id numbers and commas \n" );
  }

const strPocketOutput  = configs.getObject( "SelectConfC1" ); // NotRequired
const strPocketRemoved = configs.getObject( "SelectConfC2" ); // NotRequired


//// == Data Retrieving / ワークフローデータの参照 ==
// (Nothing. Retrieved via Expression Language in Config Retrieving)


//// == Calculating / 演算 ==
const arr2dInput   = parseAsRectangular( strInput ); // creates a 2D array.
const arrKeyColIds = strKeyColIds.split( "," );

let   arrOutput    = [];
let   arrRemoved   = [];
let   setUniqKeys  = new Set();
// The Set object lets you store unique values of any type. Require ES2015(ES6)
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Set
// https://developer.mozilla.org/ja/docs/Web/JavaScript/Reference/Global_Objects/Set

if ( boolReverseMode ) {
  arr2dInput.reverse(); // [ "line0", "line1", "line2" ] ⇒ [ "line2", "line1", "line0" ]
}
for ( let i = 0; i < arr2dInput.length; i++ ) {
  let strKey = "";
  for ( let j = 0; j < arrKeyColIds.length; j++ ){ // e.g.; "CellA\tCellB\tCellC"
    strKey  += arr2dInput[i][ arrKeyColIds[j] ];
    if ( j !== arrKeyColIds.length - 1 ){
      strKey += "\t";
    }
  }
  // engine.log( " AutomatedTask StrKey: " + strKey );
  if ( setUniqKeys.has ( strKey ) ) {
    // engine.log( " AutomatedTask SetUniqKeys has: " + strKey + " (line: " + i + ")" );
    arrRemoved.push ( arr2dInput[i].join( '\t' ) );
  } else {
    arrOutput.push  ( arr2dInput[i].join( '\t' ) );
    setUniqKeys.add ( strKey );
  }
}
if ( boolReverseMode ) {
  if ( arrOutput.length !== 0 ){
       arrOutput.reverse(); // [ "line2", "line1", "line0" ] ⇒ [ "line0", "line1", "line2" ]
  }
  if ( arrRemoved.length !== 0 ){
       arrRemoved.reverse();
  }
}

let strOutput  = arrOutput?.join( '\n' ) ?? "";
let strRemoved = arrRemoved?.join( '\n' ) ?? "";
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Optional_chaining


//// == Data Updating / ワークフローデータへの代入 ==
if ( strPocketOutput !== null ) {
  engine.setData ( strPocketOutput,  strOutput );
}
if ( strPocketRemoved !== null ) {
  engine.setData ( strPocketRemoved, strRemoved );
}

} //////// END "main()" /////////////////////////////////////////////////////////////////



// Parses TSV string as two-dimensional rectangular data matrix and creates a 2D array.
function parseAsRectangular( strTsv ){
  const arrTsv = strTsv.split("\n");

  /// Get numMinWidth and numMaxWidth (blank lines are excluded)
  let numMinWidth   = Infinity; // cf. String-Type Max: 1 million
  let numMaxWidth   = 0;
  let numBlanklines = 0;
  for( let i = 0; i < arrTsv.length; i++ ){
    if( arrTsv[i] === "" ){ // Skip blank lines
      numBlanklines += 1;
      continue;
    }
    let arrCells = arrTsv[i].split("\t");
    if( numMinWidth > arrCells.length ){ numMinWidth = arrCells.length; }
    if( numMaxWidth < arrCells.length ){ numMaxWidth = arrCells.length; }
  }
  engine.log( " AutomatedTask TsvDataCheck:" + 
              " MinWidth:" + numMinWidth +
              " MaxWidth:" + numMaxWidth +
              " Lines:" + arrTsv.length +
              " (BlankLines:" + numBlanklines + ")" );

  /// Get numMinWidth and numMaxWidth (blank lines are excluded)
  let arr2dTsv      = [];
  for( let i = 0; i < arrTsv.length; i++ ){
    if( arrTsv[i] === "" ){ // Skip blank lines
      continue;
    }
    let arrTmp = [];
    let arrCells = arrTsv[i].split("\t");
    for( let j = 0; j < numMaxWidth; j++ ){
      if( j < arrCells.length ){
        arrTmp[j] = arrCells[j];
      }else{
        arrTmp[j] = "";
      }
    }
    arr2dTsv.push( arrTmp );
  }

  return arr2dTsv;
}


/*
Notes:
- When the process reaches the automated step, the "TSV text" will be automatically evaluated.
    - All rows with duplicate specified columns (primary keys) are deleted.
    - In Forward mode, the first occurrence of a row is retained.
    - And the second and subsequent occurrences are deleted.
- To check for duplicates in the so-called "composite primary key," specify multiple column IDs.
    - Setting e.g.; "0,1,2", "0,2"
Appendix-en:
- If the "Output TSV Text" is empty, the string type data item is updated with empty characters.
- If the "delete line" is empty, the string type data item is updated with an empty string.
- In computer science, a set is an abstract data type:
    - that can store unique values, without any particular order.
    - It is a computer implementation of the mathematical concept of a finite set.
    - https://en.wikipedia.org/wiki/Set_(abstract_data_type)
    - https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Set

Notes-ja:
- 案件が自動工程に到達した際、「TSVテキスト」が評価されます。
    - 指定カラム(主キー)が重複する行が全て削除されます。
    - 正順モード(Forward mode)の場合、最初に出現した行が保持され、2回目以降に出現する行は削除されます。
- いわゆる「複合主キー」で重複を確認したい場合、カラムIDを複数指定します。
    - 設定例: "0,1,2"、"0,2"
Appendix-ja:
- もし「出力TSVテキスト」が空の場合、文字列型データ項目は、空文字で更新されます。
- もし「削除行」が空の場合、文字列型データ項目は、空文字で更新されます。
- Set とは、コンピュータプログラミングで用いられる抽象データ型の一種。
    - 順序のないデータの集まり(有限集合)を表現する抽象データ型。
    - 同一のデータは一つしか含まれないことが保証される。
    - https://w.wiki/6DCr
    - https://developer.mozilla.org/ja/docs/Web/JavaScript/Reference/Global_Objects/Set
*/

Download

warning Freely modifiable JavaScript (ECMAScript) code. No warranty of any kind.
(Installing Addon Auto-Steps are available only on the Professional edition.)

Notes

  • When the process reaches the automated step, the “TSV text” will be automatically evaluated.
    • All rows with duplicate specified columns (primary keys) are deleted.
    • In Forward mode, the first occurrence of a row is retained.
    • And the second and subsequent occurrences are deleted.
  • To check for duplicates in the so-called “composite primary key,” specify multiple column IDs.
    • Setting e.g.; “0,1,2”, “0,2”

Capture

Extracts and deletes rows with duplicate column elements. In forward mode it keeps the first row, in reverse mode it keeps the last row. Note that "ABC" and "Abc" are recognized as different columns. (case sensitive)

Appendix

See Also

Leave a Reply

This site uses Akismet to reduce spam. Learn how your comment data is processed.

%d bloggers like this: