From 511ce3c3dbe617a46cf7e55c0ea3140b4808a69d Mon Sep 17 00:00:00 2001
From: bob <falshan@clemson.edu>
Date: Mon, 20 Aug 2018 14:18:21 -0400
Subject: [PATCH] TS-9 fianl change

---
 TextSearch/Common/Constants.ecl              |   2 +-
 TextSearch/Common/Default_Keywording.ecl     |   5 +-
 TextSearch/Common/FileName_Info.ecl          |  13 +-
 TextSearch/Common/FileName_Info_Instance.ecl |   6 +-
 TextSearch/Common/FileNames.ecl              |  28 +--
 TextSearch/Common/IKeywording.ecl            |   4 +-
 TextSearch/Common/Layouts.ecl                |   2 +-
 TextSearch/Common/NumericCollationFormat.ecl |   4 +-
 TextSearch/Common/Pattern_Definitions.ecl    |   3 +-
 TextSearch/Common/Types.ecl                  |  14 +-
 TextSearch/Inverted/Base_Data.ecl            |   2 +-
 TextSearch/Inverted/Basic_Key_List.ecl       |  21 --
 TextSearch/Inverted/Build_Slice_Action.ecl   |  16 +-
 TextSearch/Inverted/Layouts.ecl              |  21 +-
 TextSearch/Inverted/Manage_Superkeys.ecl     |  39 ---
 TextSearch/Inverted/ParsedText.ecl           |  86 ++++++-
 TextSearch/Inverted/RawPostings.ecl          |   8 +-
 TextSearch/Inverted/SpecialPostings.ecl      |   2 +-
 TextSearch/Inverted/check.ecl                |  54 ++++
 TextSearch/Inverted/check2.ecl               |  90 +++++++
 TextSearch/Inverted/check3.ecl               | 186 ++++++++++++++
 TextSearch/Inverted/initialism.ecl           | 108 ++++++++
 TextSearch/Inverted/john1.ecl                |  84 +++++++
 TextSearch/Inverted/john2.ecl                |  46 ++++
 TextSearch/Inverted/moby.ecl                 |  33 +++
 TextSearch/Inverted/states.ecl               | 150 +++++++++++
 TextSearch/Inverted/test_moby.ecl            |  52 ++++
 TextSearch/Inverted/try2.ecl                 | 133 ++++++++++
 TextSearch/Inverted/try4.ecl                 | 247 +++++++++++++++++++
 TextSearch/Inverted/word2vec_1.ecl           | 102 ++++++++
 TextSearch/Inverted/word2vec_2.ecl           | 108 ++++++++
 TextSearch/Inverted/word2vec_3.ecl           | 144 +++++++++++
 32 files changed, 1656 insertions(+), 157 deletions(-)
 delete mode 100644 TextSearch/Inverted/Basic_Key_List.ecl
 delete mode 100644 TextSearch/Inverted/Manage_Superkeys.ecl
 create mode 100644 TextSearch/Inverted/check.ecl
 create mode 100644 TextSearch/Inverted/check2.ecl
 create mode 100644 TextSearch/Inverted/check3.ecl
 create mode 100644 TextSearch/Inverted/initialism.ecl
 create mode 100644 TextSearch/Inverted/john1.ecl
 create mode 100644 TextSearch/Inverted/john2.ecl
 create mode 100644 TextSearch/Inverted/moby.ecl
 create mode 100644 TextSearch/Inverted/states.ecl
 create mode 100644 TextSearch/Inverted/test_moby.ecl
 create mode 100644 TextSearch/Inverted/try2.ecl
 create mode 100644 TextSearch/Inverted/try4.ecl
 create mode 100644 TextSearch/Inverted/word2vec_1.ecl
 create mode 100644 TextSearch/Inverted/word2vec_2.ecl
 create mode 100644 TextSearch/Inverted/word2vec_3.ecl

diff --git a/TextSearch/Common/Constants.ecl b/TextSearch/Common/Constants.ecl
index 18ad6b7..87d96e9 100644
--- a/TextSearch/Common/Constants.ecl
+++ b/TextSearch/Common/Constants.ecl
@@ -1,4 +1,4 @@
-EXPORT Constants := MODULE
+﻿EXPORT Constants := MODULE
   // Limit Constants
   EXPORT Max_SearchTerms := 1000;
   EXPORT Max_Ops  := 2 * Max_SearchTerms;
diff --git a/TextSearch/Common/Default_Keywording.ecl b/TextSearch/Common/Default_Keywording.ecl
index 06a0cb5..b7d483c 100644
--- a/TextSearch/Common/Default_Keywording.ecl
+++ b/TextSearch/Common/Default_Keywording.ecl
@@ -1,10 +1,11 @@
 ﻿//Default implementation.  Provides minimal functionality.
-IMPORT Std.Uni;
+//from me this file give defult values to varibles and interface in Ikeywording file
+IMPORT Std.Uni; //to use lower case or upper case (from me)
 IMPORT TextSearch.Common;
 IMPORT TextSearch.Common.Types;
 IMPORT TextSearch.Common.Layouts;
 TermString    := Types.TermString;
-EquivTerm     := Layouts.EquivTerm;
+EquivTerm     := Layouts.EquivTerm; //store all record
 Version       := Types.Version;
 NoEquiv       := DATASET([],EquivTerm);
 ToUpper       := Uni.ToUpperCase;
diff --git a/TextSearch/Common/FileName_Info.ecl b/TextSearch/Common/FileName_Info.ecl
index 6455ce1..41dcde7 100644
--- a/TextSearch/Common/FileName_Info.ecl
+++ b/TextSearch/Common/FileName_Info.ecl
@@ -1,13 +1,8 @@
-//FileName Info structured used for file name generation.
-//This version includes pre-Slice management hack to support tracking update
-//versions with incremental updates.
-EXPORT FileName_Info := INTERFACE
+﻿EXPORT FileName_Info := INTERFACE
   EXPORT STRING Prefix;
   EXPORT STRING Instance;    // the version for an individual instance or the Alias
   EXPORT STRING AliasInstance := 'CURRENT';
-  EXPORT SET OF STRING AliasInstances := [AliasInstance, 'LAST', 'PAST', 'DELETED'];
-  EXPORT UNSIGNED2 Naming := 1;       // version of naming system
-  EXPORT UNSIGNED2 DataVersion := 0;  // placeholder for data version to build
-  EXPORT UNSIGNED1 Levels := 5;
-  EXPORT STRING UseInstance(UNSIGNED indx) := IF(indx=0, Instance, AliasInstances[indx]);
+  EXPORT UNSIGNED2 Naming := 1;
+  EXPORT UNSIGNED2 DataVersion := 0;
+  EXPORT UNSIGNED1 Levels := 5;//from 0 to 4
 END;
diff --git a/TextSearch/Common/FileName_Info_Instance.ecl b/TextSearch/Common/FileName_Info_Instance.ecl
index e6df211..b3638e9 100644
--- a/TextSearch/Common/FileName_Info_Instance.ecl
+++ b/TextSearch/Common/FileName_Info_Instance.ecl
@@ -1,8 +1,8 @@
-//Instance of the FileName_Info block.  Used to unify the names used by TextSearch.
+﻿//Instance of the FileName_Info block.  Used to unify the names used by TextSearch.
 IMPORT TextSearch.Common;
 IMPORT STD.Str;
 Info := Common.FileName_Info;
-EXPORT FileName_Info_Instance(STRING aPre, STRING aInst) := MODULE(Info)
+EXPORT FileName_Info_Instance(STRING aPre, STRING aInst) := MODULE(Info)//mean this module will have all field in FileName_Info
   STRING wPrefix := TRIM(Str.ToUpperCase(aPre),ALL);
   EXPORT STRING Prefix := IF(wPrefix<>'',
                              wPrefix,
@@ -10,5 +10,5 @@ EXPORT FileName_Info_Instance(STRING aPre, STRING aInst) := MODULE(Info)
                                   Common.Constants.No_Prfx_code,
                                   (STRING)Common.Constants.No_Prfx_Msg));
   STRING wInst := TRIM(Str.ToUpperCase(aInst),ALL);
-  EXPORT STRING Instance := IF(wInst<>'', wInst, AliasInstance);
+  EXPORT STRING Instance := IF(wInst<>'', wInst, AliasInstance);//AliasInstance='current'
 END;
\ No newline at end of file
diff --git a/TextSearch/Common/FileNames.ecl b/TextSearch/Common/FileNames.ecl
index 5705e77..a71e23f 100644
--- a/TextSearch/Common/FileNames.ecl
+++ b/TextSearch/Common/FileNames.ecl
@@ -1,4 +1,4 @@
-IMPORT TextSearch.Common;
+﻿IMPORT TextSearch.Common;
 //Creates file names.  The names are both the names of the individual
 //logical files and the container names used as aliases for a group
 //of file instances.
@@ -9,11 +9,11 @@ IMPORT TextSearch.Common;
 // Instance is FileName.Instance; and Suffix is the data type as below.
 FileName_Info := Common.FileName_Info;
 
-EXPORT FileNames(FileName_Info info, UNSIGNED Alias=0) := MODULE
+EXPORT FileNames(FileName_Info info) := MODULE //to set name of doc
   SHARED DocSearchPrefix := '::DocSearch::Level-';
   SHARED Name(STRING suffix, UNSIGNED lvl) := info.Prefix + DocSearchPrefix
                                             + INTFORMAT(lvl, 2, 1) + '::'
-                                            + info.UseInstance(Alias) + '::' + suffix;
+                                            + info.Instance + '::' + suffix;
 
   EXPORT DocumentIndex(UNSIGNED lvl=0) := Name('DocIndx', lvl);
   EXPORT TriGramDictionary(UNSIGNED lvl=0) := Name('TriDctIndx', lvl);
@@ -28,26 +28,4 @@ EXPORT FileNames(FileName_Info info, UNSIGNED Alias=0) := MODULE
   EXPORT TagDictionary(UNSIGNED lvl=0) := Name('TagIndx', lvl);
   EXPORT IdentIndx(UNSIGNED1 lvl=0) := Name('IdentIndx', lvl);
   EXPORT DeleteIndex(UNSIGNED1 lvl=0) := NAME('DelIndx', lvl);
-  EXPORT NameEnum := Common.Types.FileEnum;
-  EXPORT NameByEnum(NameEnum ne, UNSIGNED1 lvl=0)
-      := CASE(ne,
-              NameEnum.DocumentIndex                => DocumentIndex(lvl),
-              NameEnum.TriGramDictionary            => TriGramDictionary(lvl),
-              NameEnum.TermDictionary               => TermDictionary(lvl),
-              NameEnum.TriGramIndex                 => TriGramIndex(lvl),
-              NameEnum.TermIndex                    => TermIndex(lvl),
-              NameEnum.PhraseIndex                  => PhraseIndex(lvl),
-              NameEnum.ElementIndex                 => ElementIndex(lvl),
-              NameEnum.AttributeIndex               => AttributeIndex(lvl),
-              NameEnum.RangeIndex                   => RangeIndex(lvl),
-              NameEnum.NameSpaceDict                => NameSpaceDict(lvl),
-              NameEnum.TagDictionary                => TagDictionary(lvl),
-              NameEnum.IdentIndx                    => IdentIndx(lvl),
-              NameEnum.DeleteIndex                  => DeleteIndex(lvl),
-              Name('BadEnum', lvl));
-  // the currently building keys.  Add triGramDictionary and TriGramIndex when ready
-  EXPORT NameSet := [NameEnum.DocumentIndex, NameEnum.TermDictionary, NameEnum.TermIndex,
-                     NameEnum.PhraseIndex, NameEnum.ElementIndex, NameEnum.AttributeIndex,
-                     NameEnum.RangeIndex, NameEnum.TagDictionary, NameEnum.IdentIndx,
-                     NameEnum.DeleteIndex];
 END;
\ No newline at end of file
diff --git a/TextSearch/Common/IKeywording.ecl b/TextSearch/Common/IKeywording.ecl
index 4d2a608..79ec85d 100644
--- a/TextSearch/Common/IKeywording.ecl
+++ b/TextSearch/Common/IKeywording.ecl
@@ -3,11 +3,11 @@
 IMPORT TextSearch.Common.Types;
 IMPORT TextSearch.Common.Layouts;
 TermString    := Types.TermString;
-EquivTerm     := Layouts.EquivTerm;
+EquivTerm     := Layouts.EquivTerm;//record
 Version       := Types.Version;
 
 EXPORT IKeywording := INTERFACE
-  EXPORT Version currentVersion;
+  EXPORT Version currentVersion; //define currentversion from version type fro me
   EXPORT BOOLEAN hasEquivalence(TermString trm, Version v=currentVersion);
   EXPORT TermString SingleKeyword(TermString trm, Version v=currentVersion);
   EXPORT DATASET(EquivTerm) EquivKeywords(TermString trm, Version v=currentVersion);
diff --git a/TextSearch/Common/Layouts.ecl b/TextSearch/Common/Layouts.ecl
index 20a4272..f571b73 100644
--- a/TextSearch/Common/Layouts.ecl
+++ b/TextSearch/Common/Layouts.ecl
@@ -2,7 +2,7 @@
 IMPORT TextSearch.Common.Types;
 IMPORT TextSearch.Inverted.Layouts AS InvertedLayouts;
 EXPORT Layouts := MODULE
-  EXPORT DocIndex := RECORD(InvertedLayouts.Document-content)
+  EXPORT DocIndex := RECORD(InvertedLayouts.Document-content)//I think here error -content what is?
     Types.KWP                 keywords;
     Types.Position            docLength;
     STRING18                  wunit;
diff --git a/TextSearch/Common/NumericCollationFormat.ecl b/TextSearch/Common/NumericCollationFormat.ecl
index e73956a..8641c51 100644
--- a/TextSearch/Common/NumericCollationFormat.ecl
+++ b/TextSearch/Common/NumericCollationFormat.ecl
@@ -1,4 +1,4 @@
-/*
+﻿/*
   <Numeric collation format>
 
   Layout in big endian:
@@ -39,7 +39,7 @@ EXPORT NumericCollationFormat := MODULE
     int expnt = 0;
     char ch = '\0';
 
-    for (int i = 0; i < (int)lenNumstr; i++)
+    for (int i = 0; i < lenNumstr; i++)
     {
       if ((ch = numstr[i]) == '.')
       {
diff --git a/TextSearch/Common/Pattern_Definitions.ecl b/TextSearch/Common/Pattern_Definitions.ecl
index 3662a91..01302be 100644
--- a/TextSearch/Common/Pattern_Definitions.ecl
+++ b/TextSearch/Common/Pattern_Definitions.ecl
@@ -1,4 +1,4 @@
-EXPORT Pattern_Definitions := MACRO
+﻿EXPORT Pattern_Definitions := MACRO
   // Pure Whitespace
   PATTERN LowControl    := PATTERN(U'[\u0001-\u0008\u000B\u000C\u000E\u000F]');
   PATTERN HighControl    := PATTERN(U'[\u007F-\u009F]');
@@ -100,6 +100,7 @@ EXPORT Pattern_Definitions := MACRO
   // Composite patterns
   // Word strings
   PATTERN Letter        := PATTERN(U'[[:alpha:]]');
+	//PATTERN NOLetter        := PATTERN(U'[^[:alpha:]]');
   PATTERN LowerCase      := PATTERN(U'[[:lower:]]');
   PATTERN UpperCase      := PATTERN(U'[[:upper:]]');
   PATTERN Digit          := PATTERN(U'[[:digit:]]');
diff --git a/TextSearch/Common/Types.ecl b/TextSearch/Common/Types.ecl
index 9128267..9061713 100644
--- a/TextSearch/Common/Types.ecl
+++ b/TextSearch/Common/Types.ecl
@@ -1,4 +1,4 @@
-// Types for search system
+﻿// Types for search system
 
 EXPORT Types := MODULE
   EXPORT DocNo            := UNSIGNED4;
@@ -16,8 +16,9 @@ EXPORT Types := MODULE
                                   SymbolChar,      // Ampersand, Section, et cetera
                                   NoiseChar,       // Noise, such as a comma or Tab
                                   WhiteSpace,      // blanks
-                                  SpecialStr);     // special keyword string
-  EXPORT TermTypeAsString(TermType typ) := CASE(typ,
+                                  SpecialStr,			 // special keyword string
+																	AcroStr);     	 //Initialism and Acronyms
+  EXPORT TermTypeAsString(TermType typ) := CASE(typ, //meaning if value is 1 then let value of type=Text string
                     1    =>  V'Text String',
                     2    =>  V'Number',
                     3    =>  V'Date',
@@ -27,9 +28,10 @@ EXPORT Types := MODULE
                     7    =>  V'Noise Character',
                     8    =>  V'White Space',
                     9    =>  V'Special Keyword',
+										10   => V'Initialism and Acronyms',
                     V'Unknown');
   EXPORT KeywordTTypes    := [TermType.TextStr, TermType.Number,
-                              TermType.Date, TermType.SymbolChar];
+                              TermType.Date, TermType.SymbolChar,TermType.AcroStr];
   EXPORT InvertTTypes     := [TermType.TextStr, TermType.Number,
                               TermType.Date, TermType.Meta,
                               TermType.Tag, TermType.SymbolChar,
@@ -87,8 +89,4 @@ EXPORT Types := MODULE
   EXPORT DocIdentifier    := UNICODE;
   EXPORT SequenceKey      := STRING50;
   EXPORT SlugLine         := UNICODE;
-  EXPORT FileEnum := ENUM(UNSIGNED1, Unknown=0, DocumentIndex, TriGramDictionary,
-                          TermDictionary, TriGramIndex, TermIndex, PhraseIndex,
-                          ELementIndex, AttributeIndex, RangeIndex, NameSpaceDict,
-                          TagDictionary, IdentIndx, DeleteIndex);
 END;
\ No newline at end of file
diff --git a/TextSearch/Inverted/Base_Data.ecl b/TextSearch/Inverted/Base_Data.ecl
index 69a5aac..03bed52 100644
--- a/TextSearch/Inverted/Base_Data.ecl
+++ b/TextSearch/Inverted/Base_Data.ecl
@@ -9,7 +9,7 @@ EXPORT Base_Data(Common.FileName_Info info,
                  DATASET(Inv_Layouts.DocumentIngest) docsIn):= MODULE
   // The documents must be enumerated
   SHARED keyword_mod := Common.Default_Keywording;
-  EXPORT enumDocs    := Inverted.EnumeratedDocs(info, docsIn);
+  EXPORT enumDocs    := Inverted.EnumeratedDocs(info, docsIn);//start here 
   EXPORT rawPostings := Inverted.RawPostings(enumDocs);
   EXPORT DocIndex    := Inverted.DocIndex(enumDocs, UNGROUP(rawPostings));
   // Need to get Replaced doc list
diff --git a/TextSearch/Inverted/Basic_Key_List.ecl b/TextSearch/Inverted/Basic_Key_List.ecl
deleted file mode 100644
index 1104cf7..0000000
--- a/TextSearch/Inverted/Basic_Key_List.ecl
+++ /dev/null
@@ -1,21 +0,0 @@
-IMPORT TextSearch.Inverted.Layouts;
-IMPORT TextSearch.Common;
-
-FileName_Info := Common.FileName_Info;
-FileName_Info_Instance := Common.FileName_Info_Instance;
-FileNames := Common.FileNames;
-Types := Common.Types;
-
-EXPORT DATASET(Layouts.Managed_File_Names) Basic_Key_List(FileName_Info info) := FUNCTION
-  Layouts.Managed_File_Names makeEntry(Types.FileEnum name) := TRANSFORM
-    SELF.logical_name := FileNames(info, 0).NameByEnum(name);
-    SELF.current_name := FileNames(info, 1).NameByEnum(name);
-    SELF.previous_name := FileNames(info, 2).NameByEnum(name);
-    SELF.past_previous_name := FileNames(info, 3).NameByEnum(name);
-    SELF.deleted_name := FileNames(info, 4).NameByEnum(name);
-    SELF.delete_deleted := TRUE;
-    SELF.task := Layouts.Management_Task.Replace;
-  END;
-  ds := DATASET(COUNT(FileNames(info).NameSet), makeEntry(FileNames(info).NameSet[COUNTER]));
-  RETURN ds;
-END;
\ No newline at end of file
diff --git a/TextSearch/Inverted/Build_Slice_Action.ecl b/TextSearch/Inverted/Build_Slice_Action.ecl
index a9ba797..437bca9 100644
--- a/TextSearch/Inverted/Build_Slice_Action.ecl
+++ b/TextSearch/Inverted/Build_Slice_Action.ecl
@@ -1,14 +1,10 @@
 ﻿// The action for building a slice, given the name of the Ingest file, and the
 //prefix and instance for the file names.
-// Optional parameter is a dataset used to list other files that we want managed.
 IMPORT TextSearch.Common;
 IMPORT TextSearch.Inverted;
 Ingest := Inverted.Layouts.DocumentIngest;
-Managed_File_Names := Inverted.Layouts.Managed_File_Names;
-empty := DATASET([], Managed_File_Names);
 
-EXPORT Build_Slice_Action(STRING ingestName, STRING prfx, STRING inst,
-                          DATASET(Managed_File_Names) mfn=empty) := FUNCTION
+EXPORT Build_Slice_Action(STRING ingestName, STRING prfx, STRING inst) := FUNCTION
   inDocs := DATASET(ingestName, Ingest, THOR);
   info := Common.FileName_Info_Instance(prfx, inst);
   kwm  := Common.Default_Keywording;
@@ -22,7 +18,7 @@ EXPORT Build_Slice_Action(STRING ingestName, STRING prfx, STRING inst,
   TrmDict  := base.TermDict;
   TagDict  := base.TagDict;
   Replaced := base.ReplacedDocs;
-  bc := PARALLEL(
+  ac := PARALLEL(
     BUILD(Common.Keys(info).TermIndex(TrmPosts))
    ,BUILD(Common.Keys(info).ElementIndex(tagposts))
    ,BUILD(Common.Keys(info).PhraseIndex(PhrsPosts))
@@ -34,13 +30,5 @@ EXPORT Build_Slice_Action(STRING ingestName, STRING prfx, STRING inst,
    ,BUILD(Common.Keys(info).IdentIndex(docIndx))
    ,BUILD(Common.Keys(info).DeleteIndex(Replaced))
   );
-  Task_Enum := Inverted.Layouts.Management_Task;
-  good_mfn := ASSERT(mfn, (task=Task_Enum.NoOp)
-                        OR (task=Task_Enum.Replace AND logical_name<>''
-                            AND current_name<>'' AND previous_name<>''
-                            AND past_previous_name<>'' AND deleted_name<>'' ),
-                      'Missing required file names for action', FAIL);
-  key_list := Inverted.Basic_Key_List(info) + good_mfn;
-  ac := SEQUENTIAL(bc, Inverted.Manage_Superkeys(info, key_list));
   RETURN ac;
 END;
\ No newline at end of file
diff --git a/TextSearch/Inverted/Layouts.ecl b/TextSearch/Inverted/Layouts.ecl
index 67fe79b..07c9d94 100644
--- a/TextSearch/Inverted/Layouts.ecl
+++ b/TextSearch/Inverted/Layouts.ecl
@@ -7,11 +7,13 @@ EXPORT Layouts := MODULE
     Types.SequenceKey       seqKey;
     Types.SlugLine          slugLine;
     UNICODE                 content;
+	UNICODE init;
+		  //string init_w_pun;
   END;
   EXPORT DocumentNo := RECORD
-    Types.DocNo id;
+    Types.DocNo id; //Types.DocNo mean set the type of DocNo to id
   END;
-  EXPORT Document := RECORD(DocumentIngest)
+  EXPORT Document := RECORD(DocumentIngest) // this mean the Document record will have all the fields in DocumentIngest and all the filelds in DocumentNo
     DocumentNo;
   END;
   // Posting Record, generated by parsing the documents.
@@ -34,16 +36,5 @@ EXPORT Layouts := MODULE
     Types.TermString          tagValue;
     Types.PathString          pathString;
     Types.TermString          parentName;
-  END;
-  // Record for the machinery to manage file names with super keys (super files)
-  EXPORT Management_Task := ENUM(UNSIGNED1, NoOp=0, Replace); // Future
-  EXPORT Managed_File_Names := RECORD
-    STRING logical_name;
-    STRING current_name;
-    STRING previous_name;
-    STRING past_previous_name;
-    STRING deleted_name;
-    BOOLEAN delete_deleted;
-    Management_Task task;
-  END;
-END;
\ No newline at end of file
+   END;
+END;
diff --git a/TextSearch/Inverted/Manage_Superkeys.ecl b/TextSearch/Inverted/Manage_Superkeys.ecl
deleted file mode 100644
index 2b684a6..0000000
--- a/TextSearch/Inverted/Manage_Superkeys.ecl
+++ /dev/null
@@ -1,39 +0,0 @@
-// Version for pre-Slice keys.
-// Assumes that user replaces the collection.
-//
-IMPORT TextSearch.Common;
-IMPORT TextSearch.Inverted;
-IMPORT TextSeaRch.Inverted.Layouts;
-IMPORT STD.File AS FS;
-
-Managed_File_Names := Layouts.Managed_File_Names;
-Management_Task := Layouts.Management_Task;
-FileName_Info := Common.FileName_Info;
-
-
-EXPORT Manage_Superkeys(FileName_Info info, DATASET(Managed_File_Names) mfn) := FUNCTION
-  ac := SEQUENTIAL(
-      // Make sure the aliases exist, create as necessary
-      NOTHOR(APPLY(mfn,
-                  IF(NOT FS.SuperFileExists(current_name), FS.CreateSuperFile(current_name))
-                 ,IF(NOT FS.SuperFileExists(previous_name), FS.CreateSuperFile(previous_name))
-                 ,IF(NOT FS.SuperFileExists(past_previous_name), FS.CreateSuperFile(past_previous_name))
-                 ,IF(NOT FS.SuperFileExists(deleted_name), FS.CreateSuperFile(deleted_name))
-             ))
-     ,OUTPUT(mfn, NAMED('Files_List'))
-     ,FS.StartSuperFileTransaction()
-     ,NOTHOR(APPLY(mfn,
-                  FS.SwapSuperFile(deleted_name, past_previous_name)
-                 ,FS.SwapSuperFile(past_previous_name, previous_name)
-                 ,FS.SwapSuperFile(previous_name, current_name)
-                 ,FS.ClearSuperFile(current_name)
-                 ,FS.AddSuperFile(current_name, logical_name)
-            ))
-     ,FS.FinishSuperFileTransaction()
-     ,NOTHOR(APPLY(mfn,
-                  FS.RemoveOwnedSubFiles(deleted_name, delete_deleted)
-                 ,FS.ClearSuperFile(deleted_name)
-             ))
-  );
-  RETURN ac;
-END;
\ No newline at end of file
diff --git a/TextSearch/Inverted/ParsedText.ecl b/TextSearch/Inverted/ParsedText.ecl
index 59e7d40..720e9ef 100644
--- a/TextSearch/Inverted/ParsedText.ecl
+++ b/TextSearch/Inverted/ParsedText.ecl
@@ -1,7 +1,8 @@
-// Parse contents of the document
+﻿// Parse contents of the document
 IMPORT TextSearch;
 IMPORT TextSearch.Common;
 IMPORT TextSearch.Inverted.Layouts;
+IMPORT STD;
 Document := Layouts.Document;
 RawPosting := Layouts.RawPosting;
 Types := Common.Types;
@@ -37,11 +38,27 @@ EXPORT DATASET(RawPosting) ParsedText(DATASET(Document) docsInput) := FUNCTION
   PATTERN EmptyEnd      := REPEAT(AttrListItem) OPT(Spaces) U'/>';
   PATTERN XMLElement    := U'<' XMLName BEFORE ContainerEnd;
   PATTERN XMLEmpty      := U'<' XMLName BEFORE EmptyEnd;
+	PATTERN expr2 :=PATTERN(U'[a-zA-Z]+[.][a-zA-Z]+[.][a-zA-Z]*[.]*[a-zA-Z]*'); 
+	PATTERN expr3 :=PATTERN(U'[a-zA-Z]+[.][a-zA-Z]+'); 
+ 
 
-  RULE myRule           := XMLDecl OR XMLComment OR XMLElement OR XMLEmpty OR
+
+ 
+	PATTERN alpha := PATTERN('[A-Za-z]+');
+	PATTERN ws := [' ']*;
+ 
+ 
+
+
+ 
+ 
+ 
+
+
+  RULE myRule           :=  expr2 ws or alpha expr2 or expr3  OR WordAllLower OR WordAllUpper  OR WordTitleCase or WordMixedCase   OR XMLDecl OR XMLComment OR XMLElement OR XMLEmpty OR
                            AttributeExpr OR EndElement OR TagEndSeq OR
-                           WordAlphaNum OR WhiteSpace OR PoundCode OR
-                           SymbolChar OR Noise OR AnyChar OR AnyPair;
+                           WordAlphaNum   OR WhiteSpace OR PoundCode OR
+                           SymbolChar OR Noise OR AnyChar OR AnyPair OR WordNoLetters  ;//or NoHenWord | Article ws Word;//update
 
   RawPosting parseString(Document doc) := TRANSFORM
     SELF.id        := doc.id;;
@@ -58,13 +75,32 @@ EXPORT DATASET(RawPosting) ParsedText(DATASET(Document) docsInput) := FUNCTION
         MATCHED(WordAlphaNum)                    => MATCHLENGTH(MyRule),
         MATCHED(AnyChar)                         => MATCHLENGTH(MyRule),
         MATCHED(AnyPair)                         => MATCHLENGTH(MyRule),
+			  MATCHED(expr2)                           => MATCHLENGTH(MyRule),
+				MATCHED(expr3)                           => MATCHLENGTH(MyRule),
+			//MATCHED(expr4)                           => MATCHLENGTH(MyRule),
+			 MATCHED(WordAllUpper)                    => MATCHLENGTH(MyRule),
+			 MATCHED(WordAllLower)                     => MATCHLENGTH(MyRule),
+			 MATCHED(WordMixedCase)                    => MATCHLENGTH(MyRule),
+			 MATCHED(WordNoLetters)										 => MATCHLENGTH(MyRule),
+			 MATCHED(WordTitleCase)										 => MATCHLENGTH(MyRule),
+
+
         0);
     SELF.keywords  := MAP(
         MATCHED(SymbolChar)                      => 1,
         MATCHED(WordAlphaNum)                    => 1,
         MATCHED(AnyChar)                         => 1,
         MATCHED(AnyPair)                         => 1,
-        0);
+				MATCHED(expr2)                           => MATCHLENGTH(expr2)- STD.Str.FindCount((STRING)MATCHTEXT(expr2), '.'),//new addition//track dot here
+				MATCHED(expr3)													 => MATCHLENGTH(expr3)- STD.Str.FindCount((STRING)MATCHTEXT(expr3), '.'),
+				MATCHED(WordAllUpper)										 =>1,
+				MATCHED(WordAllLower)										 =>1,
+				MATCHED(WordMixedCase)									 =>1,
+				MATCHED(WordTitleCase)									 =>1,
+				MATCHED(WordNoLetters)									 =>1,
+		 
+					
+				0);
     SELF.typTerm   := MAP(
         MATCHED(WhiteSpace)                      => Types.TermType.WhiteSpace,
         MATCHED(SymbolChar)                      => Types.TermType.SymbolChar,
@@ -80,7 +116,15 @@ EXPORT DATASET(RawPosting) ParsedText(DATASET(Document) docsInput) := FUNCTION
         MATCHED(EndElement)                      => Types.TermType.Tag,
         MATCHED(TagEndSeq)                       => Types.TermType.Tag,
         MATCHED(PoundCode)                       => Types.TermType.TextStr,
-        Types.TermType.Unknown);
+				MATCHED(expr2)												   => Types.TermType.AcroStr,//new addition
+				MATCHED(expr3)												   => Types.TermType.AcroStr,//new addition
+				MATCHED(WordAllUpper)                    => Types.TermType.TextStr,
+        MATCHED(WordAllLower)                    => Types.TermType.TextStr,
+				MATCHED(WordMixedCase)                   => Types.TermType.TextStr,
+				MATCHED(WordTitleCase)                	 => Types.TermType.TextStr,
+				MATCHED(WordNoLetters)                   => Types.TermType.SymbolChar,
+				
+				Types.TermType.Unknown);
     SELF.typData   := MAP(
         MATCHED(WhiteSpace)                      => Types.DataType.RawData,
         MATCHED(SymbolChar)                      => Types.DataType.RawData,
@@ -97,8 +141,17 @@ EXPORT DATASET(RawPosting) ParsedText(DATASET(Document) docsInput) := FUNCTION
         MATCHED(EndElement)                      => Types.DataType.EndElement,
         MATCHED(TagEndSeq)                       => Types.DataType.TagEndSeq,
         MATCHED(PoundCode)                       => Types.DataType.RawData,
+				MATCHED(expr2)												   => Types.DataType.RawData,
+				MATCHED(expr3)												   => Types.DataType.RawData,
+				MATCHED(WordAllUpper)                    => Types.DataType.RawData,
+        MATCHED(WordAllLower)                    => Types.DataType.RawData,
+				MATCHED(WordMixedCase)                   => Types.DataType.RawData,
+				MATCHED(WordTitleCase)                   => Types.DataType.RawData,
+				MATCHED(WordNoLetters)                   => Types.DataType.RawData,
+				//if(SELF.depth>0,Types.DataType.PCDATA,Types.DataType.RawData ),
         Types.DataType.Unknown);
-    SELF.tagValue  := MAP(
+   
+	 SELF.tagValue  := MAP(
         NOT MATCHED(AttributeExpr)              => U'',
         MATCHED(QuotValueWrap)                  => MATCHUNICODE(AnyNoQuoteStr),
         MATCHED(AposValueWrap)                  => MATCHUNICODE(AnyNoAposStr),
@@ -113,10 +166,23 @@ EXPORT DATASET(RawPosting) ParsedText(DATASET(Document) docsInput) := FUNCTION
     SELF.preorder  := 0;
     SELF.parentOrd := 0;
     SELF.parentName:= U'';
-    SELF.lp        := Types.LetterPattern.Unknown;
+    SELF.lp        := MAP(
+        MATCHED(WordAllUpper)                   => Types.LetterPattern.UpperCase,
+        MATCHED(WordAllLower)                   => Types.LetterPattern.LowerCase,
+				MATCHED(WordMixedCase)                  => Types.LetterPattern.MixedCase,
+				MATCHED(WordNoLetters)                  => Types.LetterPattern.NoLetters,
+				MATCHED(WordTitleCase)                  => Types.LetterPattern.TitleCase,
+				
+				
+		
+		Types.LetterPattern.Unknown);
+
     SELF.term      := MATCHUNICODE(MyRule);
+		
+		//SELF.initalism :=MATCHTEXT(MyRule);
   END;
   p0 := PARSE(docsInput, content, myRule, parseString(LEFT), MAX, MANY, NOT MATCHED);
-  p1 := ASSERT(p0, typTerm<>Types.TermType.Unknown, Constants.OtherCharsInText_Msg);
-  RETURN p1(typTerm <> Types.TermType.WhiteSpace);
+ //p1 := ASSERT(p0, typTerm<>Types.TermType.Unknown, Constants.OtherCharsInText_Msg);
+  RETURN p0(typTerm <> Types.TermType.WhiteSpace);// change p1 to p0 here 
+ //Return p0;//addition
 END;
\ No newline at end of file
diff --git a/TextSearch/Inverted/RawPostings.ecl b/TextSearch/Inverted/RawPostings.ecl
index 0ab9c1c..89b9fc9 100644
--- a/TextSearch/Inverted/RawPostings.ecl
+++ b/TextSearch/Inverted/RawPostings.ecl
@@ -1,4 +1,4 @@
-//Convert raw content into posting records
+﻿//Convert raw content into posting records
 IMPORT TextSearch.Common;
 IMPORT TextSearch.Common.Types;
 IMPORT TextSearch.Inverted;
@@ -30,6 +30,7 @@ EXPORT GROUPED DATASET(Posting) RawPostings(DATASET(Document) docIn) := FUNCTION
     Types.TermLength            lenText;
     Types.KWP                   keywords;
     Types.Ordinal               preorder;
+		 
   END;
   StateRec := RECORD
     Types.Depth                 currDepth;
@@ -49,6 +50,7 @@ EXPORT GROUPED DATASET(Posting) RawPostings(DATASET(Document) docIn) := FUNCTION
     SELF.preorder      := IF(docChanged, 0, st.lastOrd) + 1;
     SELF.lenText       := st.lenText;
     SELF.keywords      := st.keywords;
+		 
   END;
   StateRec initState() := TRANSFORM
     SELF.lastOrd       := 0;
@@ -107,6 +109,7 @@ EXPORT GROUPED DATASET(Posting) RawPostings(DATASET(Document) docIn) := FUNCTION
     incrOrdinal       := IF(isElement(posting.typData), 1, 0);
     closeElement      := posting.typData=DataType.EndElement;
     SELF.kwp          := IF(docChanged, 1, st.nextKWP);
+		
     SELF.depth        := IF(closeElement, st.currDepth-1, st.currDepth);
     SELF.parentOrd    := toppreord;
     SELF.preorder     := IF(docChanged, 0, st.lastOrd) + incrOrdinal;
@@ -114,10 +117,11 @@ EXPORT GROUPED DATASET(Posting) RawPostings(DATASET(Document) docIn) := FUNCTION
     SELF.parentName   := topParentName;
     SELF.lenText      := IF(closeElement, st.lenText, posting.lenText);
     SELF.keywords     := IF(closeElement, st.keywords, posting.keywords);
+		SELF.typData	    :=IF(SELF.depth>0 and posting.typData =Types.DataType.RawData ,Types.DataType.PCDATA,Types.DataType.RawData );
     SELF              := posting;
   END;
   initalV := ROW(initState());
   p2      := PROCESS(p1, initalV, assign(LEFT,RIGHT), next(LEFT,RIGHT), LOCAL);
   p3      := GROUP(p2, id) : ONWARNING(1037, IGNORE);
   RETURN p3;
-END;
+END;
\ No newline at end of file
diff --git a/TextSearch/Inverted/SpecialPostings.ecl b/TextSearch/Inverted/SpecialPostings.ecl
index ca2ef7d..104d08a 100644
--- a/TextSearch/Inverted/SpecialPostings.ecl
+++ b/TextSearch/Inverted/SpecialPostings.ecl
@@ -1,4 +1,4 @@
-// Make the special posting records.
+﻿// Make the special posting records.
 // Right now, the only special records are the document records for
 //the universal document set operation
 EXPORT SpecialPostings(DATASET(Layouts.Posting) inp) := FUNCTION
diff --git a/TextSearch/Inverted/check.ecl b/TextSearch/Inverted/check.ecl
new file mode 100644
index 0000000..3724229
--- /dev/null
+++ b/TextSearch/Inverted/check.ecl
@@ -0,0 +1,54 @@
+﻿//EXPORT check := 'todo';
+IMPORT TextSearch.Inverted;
+IMPORT TextSearch.Common;
+IMPORT STD;
+
+prefix := '~thor::jdh::';
+inputName := prefix + 'corrected_lda_ap_txtt_xml';
+
+Work1 := RECORD
+  UNICODE doc_number{XPATH('/DOC/DOCNO')};
+  UNICODE content{MAXLENGTH(32000000),XPATH('<>')};
+  UNICODE text{MAXLENGTH(32000000),XPATH('/DOC/TEXT')};
+  UNSIGNED8 file_pos{VIRTUAL(fileposition)};
+END;
+
+
+
+
+Inverted.Layouts.DocumentIngest cvt(Work1 lr) := TRANSFORM
+  SELF.identifier := TRIM(lr.doc_number, LEFT,RIGHT);
+  SELF.seqKey := inputName + '-' + INTFORMAT(lr.file_pos,12,1);
+  SELF.slugLine := lr.text[1..STD.Uni.Find(lr.text,'.',1)+1];
+  SELF.content := lr.content;
+	 SELF.init := lr.content;
+
+END;
+
+
+stem := prefix + 'corrected_lda_ap_txtt_xml';
+instance := 'initial2';
+
+inDocs := DATASET(inputName, Work1, XML('/DOC', NOROOT));
+ds1 := PROJECT(inDocs, cvt(LEFT));
+OUTPUT(ENTH(ds1, 20), NAMED('Sample_20'));//will print only 20 records 
+
+info := Common.FileName_Info_Instance(stem, instance);
+
+enumDocs    := Inverted.EnumeratedDocs(info, ds1);
+p1 := Inverted.ParsedText(enumDocs);
+rawPostings := Inverted.RawPostings(enumDocs);
+OUTPUT(CHOOSEN(p1,30));
+
+
+
+
+
+
+ 
+  
+
+
+
+ 
+
diff --git a/TextSearch/Inverted/check2.ecl b/TextSearch/Inverted/check2.ecl
new file mode 100644
index 0000000..94472c2
--- /dev/null
+++ b/TextSearch/Inverted/check2.ecl
@@ -0,0 +1,90 @@
+﻿IMPORT TextSearch.Inverted;
+IMPORT TextSearch.Common;
+IMPORT STD;
+#option('outputLimit',100);
+
+
+prefix := '~thor::jdh::';
+inputName := prefix + 'corrected_lda_ap_txtt_xml';
+
+Work1 := RECORD
+  UNICODE doc_number{XPATH('/DOC/DOCNO')};
+  UNICODE content{MAXLENGTH(32000000),XPATH('<>')};
+  UNICODE text{MAXLENGTH(32000000),XPATH('/DOC/TEXT')};
+  UNSIGNED8 file_pos{VIRTUAL(fileposition)};
+	set of String init;
+	// string init_w_pun;
+END;
+
+
+Inverted.Layouts.DocumentIngest cvt(Work1 lr) := TRANSFORM
+  SELF.identifier := TRIM(lr.doc_number, LEFT,RIGHT);
+  SELF.seqKey := inputName + '-' + INTFORMAT(lr.file_pos,12,1);
+  SELF.slugLine := lr.text[1..STD.Uni.Find(lr.text,'.',1)+1];
+  SELF.content := lr.content;
+	SELF.init:=[];
+//	SELF.init_w_pun:=[];
+END;
+
+
+stem := prefix + 'corrected_lda_ap_txtt_xml';
+instance := 'initial2';
+expr:='[a-zA-Z][.][a-zA-Z]*[.][a-zA-Z]*[.]*[a-zA-Z]*';
+
+ds0 := DATASET(inputName, Work1, XML('/DOC', NOROOT));
+inDocs := PROJECT(ds0, cvt(LEFT));
+//OUTPUT(ENTH(inDocs, 20), NAMED('Sample_20'));//will print only 20 records 
+
+//prefix := '~thor::jdh::';
+//inputName := prefix + 'corrected_lda_ap_txtt_xml';
+//stem := prefix + 'corrected_lda_ap_txtt_xml';
+//instance := 'initial2';
+
+
+
+//inDocs := DATASET(inputName, Inverted.Layouts.DocumentIngest, THOR);
+OUTPUT(ENTH(inDocs, 20), NAMED('Sample_20'));//will print only 20 records 
+info := Common.FileName_Info_Instance(stem, instance);
+
+enumDocs    := Inverted.EnumeratedDocs(info, inDocs);
+rawPostings := Inverted.RawPostings(enumDocs);
+OUTPUT(CHOOSEN(rawPostings,300), ALL, NAMED('First_300'));
+selPostings := rawPostings(id=1 AND (start<100 OR start>3400));
+OUTPUT(selPostings, NAMED('Select_Doc_1'));
+/*
+t_len := TABLE(enumDocs, {id, INTEGER len:=LENGTH(CONTENT)}, id, LOCAL);
+p_tab := TABLE(rawPostings,
+            {id, depth,
+             INTEGER kwds:=SUM(GROUP,keywords), INTEGER sum_lengths:=SUM(GROUP,lenText),
+             INTEGER min_kwp:=MIN(GROUP,kwp), INTEGER max_kwp:=MAX(GROUP,kwp),
+             INTEGER end_pos:=MAX(GROUP,stop)},
+            id, depth, LOCAL);
+pl_tab := JOIN(p_tab, t_len, LEFT.id=RIGHT.id, LOCAL);
+OUTPUT(TOPN(pl_tab, 100, id, depth), NAMED('SUMMARY_100'));
+*/
+	integer i:=0;
+t:=TABLE(rawPostings,
+            {id, term,
+            // INTEGER kwds:=SUM(GROUP,keywords), INTEGER sum_lengths:=SUM(GROUP,lenText),
+             //INTEGER min_kwp:=MIN(GROUP,kwp), INTEGER max_kwp:=MAX(GROUP,kwp),
+             //INTEGER end_pos:=MAX(GROUP,stop)
+							//String t:=term='.';
+						
+							unicode t:=if(term='.' ,term+term[8],'');
+							//String t:=if(term='.' and term[2] !='',term+term[2],'')
+					//	i:=i+1;
+						 },
+            id, term, LOCAL);
+						
+						output(t);
+
+
+
+
+
+
+
+ 
+
+
+
diff --git a/TextSearch/Inverted/check3.ecl b/TextSearch/Inverted/check3.ecl
new file mode 100644
index 0000000..7a9b6fd
--- /dev/null
+++ b/TextSearch/Inverted/check3.ecl
@@ -0,0 +1,186 @@
+﻿//EXPORT try2 := 'todo';
+
+//EXPORT solution := 'todo';
+
+//EXPORT check := 'todo';
+IMPORT TextSearch.Inverted;
+IMPORT TextSearch.Common;
+IMPORT STD;
+IMPORT TextSearch.Inverted.Layouts;
+
+
+
+prefix := '~thor::jdh::';
+inputName := prefix + 'corrected_lda_ap_txtt_xml';
+
+Work1 := RECORD
+  UNICODE doc_number{XPATH('/DOC/DOCNO')};
+  UNICODE content{MAXLENGTH(32000000),XPATH('<>')};
+  UNICODE text{MAXLENGTH(32000000),XPATH('/DOC/TEXT')};
+  UNSIGNED8 file_pos{VIRTUAL(fileposition)};
+	UNICODE init;
+	
+END;
+
+
+Inverted.Layouts.DocumentIngest cvt(Work1 lr) := TRANSFORM
+  SELF.identifier := TRIM(lr.doc_number, LEFT,RIGHT);
+  SELF.seqKey := inputName + '-' + INTFORMAT(lr.file_pos,12,1);
+  SELF.slugLine := lr.text[1..STD.Uni.Find(lr.text,'.',1)+1];
+  SELF.content := lr.content;
+	SELF.init:=lr.content;
+
+END;
+
+
+stem := prefix + 'corrected_lda_ap_txtt_xml';
+instance := 'initial2';
+
+ds0 := DATASET(inputName, Work1, XML('/DOC', NOROOT));
+inDocs := PROJECT(ds0, cvt(LEFT));
+OUTPUT(ENTH(inDocs, 20), NAMED('Sample_20'));//will print only 20 records 
+
+info := Common.FileName_Info_Instance(stem, instance);
+
+///////////////////////////////////
+expr:=U'[a-zA-Z][.][a-zA-Z]*[.][a-zA-Z]*[.]*[a-zA-Z]*';
+
+
+cont:= RECORD
+ string term;
+ //inDocs.init_w_pun;
+//set of  string x;
+END;;
+Inverted.Layouts.DocumentIngest filter(Inverted.Layouts.DocumentIngest doc) := TRANSFORM
+//init:=REGEXFINDSET( expr,(string)doc.content);
+//SELF.content:=doc.content;
+//SELF.init:=REGEXREPLACE( expr,doc.content,STD.Uni.FilterOut(doc.content, '.'));
+SELF.init:=REGEXREPLACE( expr,doc.content,STD.Uni.FilterOut(doc.init, '.'));//+REGEXFINDSET(expr,doc.content);
+
+
+//SELF.init_w_pun:=STD.Str.FilterOut((string)SELF.init, '.');
+//self.init:=STD.Str.FilterOut(REGEXFINDSET( expr,(string)doc.content), '.');
+//to change the field must use self.field
+//add new column in data set and search in both 
+//output(init);
+SELF := doc;
+END;
+s:= PROJECT(inDocs, filter(LEFT));
+OUTPUT(ENTH(s, 20), NAMED('Sample_200'));//will print only 20 records 
+
+//output(s);
+//output(REGEXFINDSET(expr,inDocs[1].content));
+
+////////////////////////////////////
+
+
+
+
+
+output(s[1].init,NAMED('Sin'));
+enumDocs    := Inverted.EnumeratedDocs(info,  s);
+p1 := Inverted.ParsedText(enumDocs);
+rawPostings := Inverted.RawPostings(enumDocs);
+
+output(rawPostings[1]);
+
+
+OUTPUT(inDocs,,'~ONLINE::Farah::OUT::Solution1',OVERWRITE);
+OUTPUT(p1,,'~ONLINE::Farah::OUT::Solution2',OVERWRITE);
+OUTPUT(rawPostings,,'~ONLINE::Farah::OUT::Solution3',OVERWRITE);
+
+OUTPUT(enumDocs,,'~ONLINE::Farah::OUT::Solution4',OVERWRITE);
+
+
+
+
+
+
+
+
+
+
+
+
+
+//OUTPUT(rawPostings,,'~ONLINE::Farah::OUT::Solution3',OVERWRITE);
+
+
+/*
+initialism:=REGEXFINDSET(expr,(string)inDocs[1].content);
+output(initialism);
+A :=STD.Str.FilterOut(initialism[1], '.');
+output(A);
+*/
+/*
+cont filters(Inverted.RawPostings doc) := TRANSFORM
+
+
+ SELF.term:='';
+SELF := doc;
+END;
+r:= PROJECT(inDocs, filters(LEFT));
+output(r);
+
+ */
+ 
+ 
+ 
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+/////////////////////////////////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ 
+
+
+ 
+
+
+ 
+ 
+ 
+ 
+
+
+  
+  
+ OUTPUT(CHOOSEN(rawPostings,300), ALL, NAMED('First_300'));
+selPostings := rawPostings(id=1 AND (start<100 OR start>3400));
+OUTPUT(selPostings, NAMED('Select_Doc_1'));
+
+t_len := TABLE(enumDocs, {id, INTEGER len:=LENGTH(init)}, id, LOCAL);
+p_tab := TABLE(rawPostings,
+            {id, depth,
+             INTEGER kwds:=SUM(GROUP,keywords), INTEGER sum_lengths:=SUM(GROUP,lenText),
+             INTEGER min_kwp:=MIN(GROUP,kwp), INTEGER max_kwp:=MAX(GROUP,kwp),
+             INTEGER end_pos:=MAX(GROUP,stop)},
+            id, depth, LOCAL);
+pl_tab := JOIN(p_tab, t_len, LEFT.id=RIGHT.id, LOCAL);
+OUTPUT(TOPN(pl_tab, 100, id, depth), NAMED('SUMMARY_100'));
+
+	 
+ 
+
+
+
diff --git a/TextSearch/Inverted/initialism.ecl b/TextSearch/Inverted/initialism.ecl
new file mode 100644
index 0000000..c0019f9
--- /dev/null
+++ b/TextSearch/Inverted/initialism.ecl
@@ -0,0 +1,108 @@
+﻿ 
+IMPORT TextSearch2.Inverted;
+IMPORT TextSearch2.Common;
+IMPORT STD;
+IMPORT TextSearch2.Inverted.Layouts;
+
+
+#option('outputLimit',100);
+
+
+prefix := '~thor::jdh::';
+inputName := prefix + 'corrected_lda_ap_txtt_xml';
+
+Work1 := RECORD
+  UNICODE doc_number{XPATH('/DOC/DOCNO')};
+  UNICODE content{MAXLENGTH(32000000),XPATH('<>')};
+  UNICODE text{MAXLENGTH(32000000),XPATH('/DOC/TEXT')};
+  UNSIGNED8 file_pos{VIRTUAL(fileposition)};
+	UNICODE init;
+	
+END;
+
+
+Inverted.Layouts.DocumentIngest cvt(Work1 lr) := TRANSFORM
+  SELF.identifier := TRIM(lr.doc_number, LEFT,RIGHT);
+  SELF.seqKey := inputName + '-' + INTFORMAT(lr.file_pos,12,1);
+  SELF.slugLine := lr.text[1..STD.Uni.Find(lr.text,'.',1)+1];
+  SELF.content := lr.content;
+	SELF.init:=lr.content;
+
+END;
+
+
+stem := prefix + 'corrected_lda_ap_txtt_xml';
+instance := 'initial2';
+
+ds0 := DATASET(inputName, Work1, XML('/DOC', NOROOT));
+inDocs := PROJECT(ds0, cvt(LEFT));
+ 
+info := Common.FileName_Info_Instance(stem, instance);
+
+ 
+expr:=U'[a-zA-Z][.][a-zA-Z][.]*[a-zA-Z]*[.]*[a-zA-Z]*';
+expr2:='[a-zA-Z][.][a-zA-Z][.]*[a-zA-Z]*[.]*[a-zA-Z]*';
+
+
+
+
+ 
+enumDocs:= Inverted.EnumeratedDocs(info,  inDocs);
+p1 := Inverted.ParsedText(enumDocs);
+rawPostings := Inverted.RawPostings(enumDocs);
+
+OUTPUT(rawPostings);
+
+
+ValRec := RECORD
+  unicode val;
+END;   
+DNrec := RECORD
+	RawPostings ;
+  DATASET(ValRec) Values;
+END;
+
+DNrec filter(rawPostings L) := TRANSFORM
+	SetStrVals  := REGEXFINDSET(expr2,(STRING)L.term)+Std.Str.SplitWords((STRING)L.term,'.');
+  ValuesDS    := DATASET(SetStrVals,{STRING StrVal});
+  SELF.Values := PROJECT(ValuesDS,
+                         TRANSFORM(ValRec,
+                                   SELF.val := (unicode)Left.StrVal));	 
+  SELF:=l;
+
+
+	
+END;
+NestedDS := PROJECT(rawPostings,filter(LEFT));   
+NestedDS;
+
+OutRec := RECORD
+		RawPostings;
+		 unicode val;
+
+END;
+
+
+
+res:=NORMALIZE(NestedDS,COUNT(LEFT.Values),
+          TRANSFORM(OutRec,
+                    SELF.val := LEFT.Values[COUNTER].val,Self.term:=LEFT.Values[COUNTER].val,SELF.len:=length(LEFT.Values[COUNTER].val),SELF.kwp:=LEFT.kwp+COUNTER,SELF.keywords:=if(length(LEFT.Values[COUNTER].val)=1,1,LEFT.keywords)
+										,SELF.lentext:=length(LEFT.Values[COUNTER].val),SELF.typterm:=if(length(LEFT.Values[COUNTER].val)=1,1,LEFT.typterm)/*,SELF.lp:=if(LEFT.lp=0,,LEFT.lp)*/;
+                    SELF := LEFT,
+										 ));
+										
+output(res);
+
+
+PATTERN expr3 :=PATTERN('[a-zA-Z][.][a-zA-Z]*[.][a-zA-Z]*[.]*[a-zA-Z]*');
+PATTERN expr4 :=PATTERN('[a-zA-Z][.][a-zA-Z]*');
+PATTERN expr5 :=PATTERN('[a-zA-Z]+');
+
+TOKEN JustAWord := expr3 expr5;
+RULE NounPhraseComp1   := JustAWord ;
+ps1 := { 
+ 
+out1 := MATCHTEXT(NounPhraseComp1) }; 
+p14 := PARSE(res, val, NounPhraseComp1, ps1, BEST,MANY,NOCASE); 
+output(p14,NAMED('Result_4'));	
+ 
\ No newline at end of file
diff --git a/TextSearch/Inverted/john1.ecl b/TextSearch/Inverted/john1.ecl
new file mode 100644
index 0000000..efad4ed
--- /dev/null
+++ b/TextSearch/Inverted/john1.ecl
@@ -0,0 +1,84 @@
+﻿
+
+IMPORT TextSearch.Inverted;
+IMPORT TextSearch.Common;
+IMPORT STD;
+IMPORT TextSearch.Inverted.Layouts;
+
+
+
+prefix := '~thor::jdh::';
+inputName := prefix + 'corrected_lda_ap_txtt_xml';
+
+Work1 := RECORD
+  UNICODE doc_number{XPATH('/DOC/DOCNO')};
+  UNICODE content{MAXLENGTH(32000000),XPATH('<>')};
+  UNICODE text{MAXLENGTH(32000000),XPATH('/DOC/TEXT')};
+  UNSIGNED8 file_pos{VIRTUAL(fileposition)};
+	UNICODE init;
+	
+END;
+
+
+Inverted.Layouts.DocumentIngest cvt(Work1 lr) := TRANSFORM
+  SELF.identifier := TRIM(lr.doc_number, LEFT,RIGHT);
+  SELF.seqKey := inputName + '-' + INTFORMAT(lr.file_pos,12,1);
+  SELF.slugLine := lr.text[1..STD.Uni.Find(lr.text,'.',1)+1];
+  SELF.content := lr.content;
+	SELF.init:=lr.content;
+
+END;
+
+
+stem := prefix + 'corrected_lda_ap_txtt_xml';
+instance := 'initial2';
+
+ds0 := DATASET(inputName, Work1, XML('/DOC', NOROOT));
+inDocs := PROJECT(ds0, cvt(LEFT));
+OUTPUT(ENTH(inDocs, 20), NAMED('Sample_20'));
+
+info := Common.FileName_Info_Instance(stem, instance);
+
+
+expr:=U'[a-zA-Z][.][a-zA-Z]*[.][a-zA-Z]*[.]*[a-zA-Z]*';
+
+
+cont:= RECORD
+ string term;
+END;;
+Inverted.Layouts.DocumentIngest filter(Inverted.Layouts.DocumentIngest doc) := TRANSFORM
+SELF.init:=REGEXREPLACE( expr,doc.content,STD.Uni.FilterOut(doc.init, '.'));
+
+SELF := doc;
+END;
+s:= PROJECT(inDocs, filter(LEFT));
+OUTPUT(ENTH(s, 20), NAMED('Sample_200'));
+output(s[1].init,NAMED('Sin'));
+output(inDocs[1].content,NAMED('Con'));
+
+enumDocs    := Inverted.EnumeratedDocs(info,  s);
+p1 := Inverted.ParsedText(enumDocs);
+rawPostings := Inverted.RawPostings(enumDocs);
+
+output(rawPostings[1]);
+
+
+OUTPUT(inDocs,,'~ONLINE::Farah::OUT::Solution1',OVERWRITE);
+OUTPUT(p1,,'~ONLINE::Farah::OUT::Solution2',OVERWRITE);
+OUTPUT(rawPostings,,'~ONLINE::Farah::OUT::Solution3',OVERWRITE);
+
+OUTPUT(enumDocs,,'~ONLINE::Farah::OUT::Solution4',OVERWRITE);
+
+OUTPUT(CHOOSEN(rawPostings,300), ALL, NAMED('First_300'));
+selPostings := rawPostings(id=1 AND (start<100 OR start>3400));
+OUTPUT(selPostings, NAMED('Select_Doc_1'));
+
+t_len := TABLE(enumDocs, {id, INTEGER len:=LENGTH(init)}, id, LOCAL);
+p_tab := TABLE(rawPostings,
+            {id, depth,
+             INTEGER kwds:=SUM(GROUP,keywords), INTEGER sum_lengths:=SUM(GROUP,lenText),
+             INTEGER min_kwp:=MIN(GROUP,kwp), INTEGER max_kwp:=MAX(GROUP,kwp),
+             INTEGER end_pos:=MAX(GROUP,stop)},
+            id, depth, LOCAL);
+pl_tab := JOIN(p_tab, t_len, LEFT.id=RIGHT.id, LOCAL);
+OUTPUT(TOPN(pl_tab, 100, id, depth), NAMED('SUMMARY_100'));
diff --git a/TextSearch/Inverted/john2.ecl b/TextSearch/Inverted/john2.ecl
new file mode 100644
index 0000000..43c2d18
--- /dev/null
+++ b/TextSearch/Inverted/john2.ecl
@@ -0,0 +1,46 @@
+﻿IMPORT TextSearch.Inverted;
+IMPORT TextSearch.Common;
+Import STD;
+
+ prefix := '~thor::jdh::';
+inputName := prefix + 'corrected_lda_ap_txtt_xml';
+
+Work1 := RECORD
+  UNICODE doc_number{XPATH('/DOC/DOCNO')};
+  UNICODE content{MAXLENGTH(32000000),XPATH('<>')};
+  UNICODE text{MAXLENGTH(32000000),XPATH('/DOC/TEXT')};
+  UNSIGNED8 file_pos{VIRTUAL(fileposition)};
+	UNICODE init;
+	
+END;
+
+
+Inverted.Layouts.DocumentIngest cvt(Work1 lr) := TRANSFORM
+  SELF.identifier := TRIM(lr.doc_number, LEFT,RIGHT);
+  SELF.seqKey := inputName + '-' + INTFORMAT(lr.file_pos,12,1);
+  SELF.slugLine := lr.text[1..STD.Uni.Find(lr.text,'.',1)+1];
+  SELF.content := lr.content;
+	SELF.init:=lr.content;
+
+END;
+
+
+
+ds0 := DATASET(inputName, Work1, XML('/DOC', NOROOT));
+inDocs := PROJECT(ds0, cvt(LEFT));
+
+Work2 := RECORD
+  Common.Types.DocIdentifier doc_ident;
+  UNSIGNED4 start;
+  UNICODE   content;
+END;
+
+Work2 splitContent(Inverted.Layouts.DocumentIngest inp, UNSIGNED sub) := TRANSFORM
+  SELF.doc_ident := inp.identifier;
+  SELF.start := ((sub-1)*100) + 1;
+  SELF.content := inp.content[SELF.start..SELF.start+99];
+END;
+
+inParts := NORMALIZE(inDocs, ((LENGTH(LEFT.content)-1)/100)+1, splitContent(LEFT, COUNTER));
+
+OUTPUT(CHOOSEN(inParts, 200), ALL, NAMED('First_200_blocks'));
diff --git a/TextSearch/Inverted/moby.ecl b/TextSearch/Inverted/moby.ecl
new file mode 100644
index 0000000..daf00f0
--- /dev/null
+++ b/TextSearch/Inverted/moby.ecl
@@ -0,0 +1,33 @@
+﻿//EXPORT moby := 'todo';
+
+#option('outputLimit',100);
+
+import std;
+CSVRecord := RECORD
+  string word;
+  
+END;
+
+ file3 := DATASET('~thor::jdh::moby',
+                 CSVrecord,
+                 CSV(HEADING(1),
+                     SEPARATOR([',']),
+                     TERMINATOR(['\n'])));
+
+file3;
+
+
+cont:= RECORD
+ 
+ unicode term;
+ set of unicode synonyms;
+END;
+cont filter(file3 doc) := TRANSFORM
+
+SELF.term:=STD.STr.SplitWords(doc.word,',')[1]; //I've got all words 
+SELF.synonyms:=STD.STr.SplitWords(doc.word,',')[2..];// to return set of synonyms 
+
+SELF := doc;
+END;
+s:= PROJECT(file3, filter(LEFT));
+output(s);
diff --git a/TextSearch/Inverted/states.ecl b/TextSearch/Inverted/states.ecl
new file mode 100644
index 0000000..3fcd815
--- /dev/null
+++ b/TextSearch/Inverted/states.ecl
@@ -0,0 +1,150 @@
+﻿
+IMPORT TextSearch2.Inverted;
+IMPORT TextSearch2.Common;
+IMPORT STD;
+IMPORT TextSearch2.Inverted.Layouts;
+Import python;
+
+#option('outputLimit',100);
+
+prefix := '~thor::jdh::';
+inputName := prefix + 'corrected_lda_ap_txtt_xml';
+
+Work1 := RECORD
+  UNICODE doc_number{XPATH('/DOC/DOCNO')};
+  UNICODE content{MAXLENGTH(32000000),XPATH('<>')};
+  UNICODE text{MAXLENGTH(32000000),XPATH('/DOC/TEXT')};
+  UNSIGNED8 file_pos{VIRTUAL(fileposition)};
+	UNICODE init;
+	
+END;
+
+
+Inverted.Layouts.DocumentIngest cvt(Work1 lr) := TRANSFORM
+  SELF.identifier := TRIM(lr.doc_number, LEFT,RIGHT);
+  SELF.seqKey := inputName + '-' + INTFORMAT(lr.file_pos,12,1);
+  SELF.slugLine := lr.text[1..STD.Uni.Find(lr.text,'.',1)+1];
+  SELF.content := lr.content;
+	SELF.init:=lr.content;
+
+END;
+
+
+stem := prefix + 'corrected_lda_ap_txtt_xml';
+instance := 'initial2';
+
+ds0 := DATASET(inputName, Work1, XML('/DOC', NOROOT));
+inDocs := PROJECT(ds0, cvt(LEFT));
+ 
+info := Common.FileName_Info_Instance(stem, instance);
+
+ 
+  
+enumDocs    := Inverted.EnumeratedDocs(info,  inDocs);
+p1 := Inverted.ParsedText(enumDocs);
+rawPostings := Inverted.RawPostings(enumDocs);
+
+OUTPUT(rawPostings,,'~ONLINE::Farah::OUT::Solution1',OVERWRITE);
+
+
+rec := RECORD
+  UNICODE  code;
+  UNICODE  state;
+END;
+Ds := DATASET([{'AK', 'Alaska'},
+        {'AL', 'Alabama'},
+        {'AR', 'Arkansas'},
+        {'AS', 'American Samoa'},
+        {'AZ', 'Arizona'},
+        {'CA', 'California'},
+        {'CO', 'Colorado'},
+        {'CT', 'Connecticut'},
+        {'DC', 'District of Columbia'},
+        {'DE', 'Delaware'},
+        {'FL', 'Florida'},
+        {'GA', 'Georgia'},
+        {'GU', 'Guam'},
+        {'HI', 'Hawaii'},
+        {'IA', 'Iowa'},
+        {'ID', 'Idaho'},
+        {'IL', 'Illinois'},
+        {'IN', 'Indiana'},
+        {'KS', 'Kansas'},
+        {'KY', 'Kentucky'},
+        {'LA', 'Louisiana'},
+        {'MA', 'Massachusetts'},
+        {'MD', 'Maryland'},
+        {'ME', 'Maine'},
+        {'MI', 'Michigan'},
+        {'MN', 'Minnesota'},
+        {'MO', 'Missouri'},
+        {'MP', 'Northern Mariana Islands'},
+        {'MS', 'Mississippi'},
+        {'MT', 'Montana'},
+        {'NA', 'National'},
+        {'NC', 'North Carolina'},
+        {'ND', 'North Dakota'},
+        {'NE', 'Nebraska'},
+        {'NH', 'New Hampshire'},
+        {'NJ', 'New Jersey'},
+        {'NM', 'New Mexico'},
+        {'NV', 'Nevada'},
+        {'NY', 'New York'},
+        {'OH', 'Ohio'},
+        {'OK', 'Oklahoma'},
+        {'OR', 'Oregon'},
+        {'PA', 'Pennsylvania'},
+        {'PR', 'Puerto Rico'},
+        {'RI', 'Rhode Island'},
+        {'SC', 'South Carolina'},
+        {'SD', 'South Dakota'},
+        {'TN', 'Tennessee'},
+        {'TX', 'Texas'},
+        {'UT', 'Utah'},
+        {'VA', 'Virginia'},
+        {'VI', 'Virgin Islands'},
+        {'VT', 'Vermont'},
+        {'WA', 'Washington'},
+        {'WI', 'Wisconsin'},
+        {'WV', 'West Virginia'},
+        {'WY', 'Wyoming'}],rec);
+				
+
+
+DsDCT := DICTIONARY(DS,{code => DS});
+DsDCT2 := DICTIONARY(DS,{state => DS});
+
+
+OUTPUT(rawPostings[0].term IN DsDCT2); 
+
+cont:= RECORD
+ 
+ rawPostings.term;
+
+END;;
+cont filter(Inverted.Layouts.RawPosting doc) := TRANSFORM
+
+SELF.term:=if(doc.term IN DsDCT or doc.term IN DsDCT2,doc.term,'');;
+
+SELF := doc;
+END;
+s:= PROJECT(rawPostings, filter(LEFT));
+output(s);
+
+
+ValRec := RECORD
+  unicode val;
+END;   
+DNrec := RECORD
+	RawPostings ;
+	 
+END;
+
+DNrec filter3(rawPostings L) := TRANSFORM
+	unicode t:=L.term;
+	SELF.term:=if(L.term IN DsDCT or L.term IN DsDCT2,t,L.term);;
+  SELF:=l;
+
+END;
+NestedDS := PROJECT(rawPostings,filter3(LEFT));  
+output(NestedDS)
diff --git a/TextSearch/Inverted/test_moby.ecl b/TextSearch/Inverted/test_moby.ecl
new file mode 100644
index 0000000..06bf8f6
--- /dev/null
+++ b/TextSearch/Inverted/test_moby.ecl
@@ -0,0 +1,52 @@
+﻿#option('outputLimit',100);
+
+import std;
+CSVRecord := RECORD
+  string word;
+  
+END;
+
+ file3 := DATASET('~thor::jdh::moby',
+                 CSVrecord,
+                 CSV(HEADING(1),
+                     SEPARATOR([',']),
+                     TERMINATOR(['\n'])));
+
+file3;
+
+
+cont:= RECORD
+ 
+ unicode term;
+ set of unicode synonyms;
+END;
+cont filter(file3 doc) := TRANSFORM
+
+SELF.term:=STD.STr.SplitWords(doc.word,',')[1]; //I've got all words 
+SELF.synonyms:=STD.STr.SplitWords(doc.word,',')[2..];// to return set of synonyms 
+
+SELF := doc;
+END;
+s:= PROJECT(file3, filter(LEFT));
+//output(s);
+
+unicode t:='Abaddon';
+output(s);
+//res:=if(s[0]=t,s[1],[]);
+//output(res)
+
+
+cont2 := RECORD
+  unicode term; 
+ set of unicode synonoms;
+
+END;
+cont2 filter2(file3 doc) := TRANSFORM
+
+SELF.term:=if(STD.STr.SplitWords(doc.word,',')[1]=t,STD.STr.SplitWords(doc.word,',')[1],'');
+
+SELF.synonoms:=if(STD.STr.SplitWords(doc.word,',')[1]=t,STD.STr.SplitWords(doc.word,',')[2..],[]); //I've got all words 
+
+END;
+s2:= PROJECT(file3, filter2(LEFT));
+ output(s2);
\ No newline at end of file
diff --git a/TextSearch/Inverted/try2.ecl b/TextSearch/Inverted/try2.ecl
new file mode 100644
index 0000000..0f93474
--- /dev/null
+++ b/TextSearch/Inverted/try2.ecl
@@ -0,0 +1,133 @@
+﻿//EXPORT try2 := 'todo';
+
+//EXPORT solution := 'todo';
+
+//EXPORT check := 'todo';
+IMPORT TextSearch.Inverted;
+IMPORT TextSearch.Common;
+IMPORT STD;
+IMPORT TextSearch.Inverted.Layouts;
+
+#option('outputLimit',100);
+
+
+
+prefix := '~thor::jdh::';
+inputName := prefix + 'corrected_lda_ap_txtt_xml';  
+
+Work1 := RECORD
+  UNICODE doc_number{XPATH('/DOC/DOCNO')};
+  UNICODE content{MAXLENGTH(32000000),XPATH('<>')};
+  UNICODE text{MAXLENGTH(32000000),XPATH('/DOC/TEXT')};
+  UNSIGNED8 file_pos{VIRTUAL(fileposition)};
+	UNICODE init;
+	
+END;
+
+
+Inverted.Layouts.DocumentIngest cvt(Work1 lr) := TRANSFORM
+  SELF.identifier := TRIM(lr.doc_number, LEFT,RIGHT);
+  SELF.seqKey := inputName + '-' + INTFORMAT(lr.file_pos,12,1);
+  SELF.slugLine := lr.text[1..STD.Uni.Find(lr.text,'.',1)+1];
+  SELF.content := lr.content;
+	SELF.init:=lr.content;
+
+END;
+
+
+stem := prefix + 'corrected_lda_ap_txtt_xml';
+instance := 'initial2';
+
+ds0 := DATASET(inputName, Work1, XML('/DOC', NOROOT));
+inDocs := PROJECT(ds0, cvt(LEFT));
+OUTPUT(ENTH(inDocs, 20), NAMED('Sample_20'));//will print only 20 records 
+
+info := Common.FileName_Info_Instance(stem, instance);
+
+///////////////////////////////////
+expr:=U'[a-zA-Z][.][a-zA-Z]*[.][a-zA-Z]*[.]*[a-zA-Z]*';
+
+
+cont:= RECORD
+ string term;
+ //inDocs.init_w_pun;
+//set of  string x;
+END;;
+Inverted.Layouts.DocumentIngest filter(Inverted.Layouts.DocumentIngest doc) := TRANSFORM
+ 
+//SELF.init:=REGEXREPLACE( expr,doc.content,STD.Uni.FilterOut(doc.init, '.'));//+REGEXFINDSET(expr,doc.content);
+
+SELF.init:=REGEXREPLACE( expr,doc.content,STD.Uni.FilterOut(doc.init, '.'));
+
+
+
+ 
+SELF := doc;
+END;
+s:= PROJECT(inDocs, filter(LEFT));
+//OUTPUT(ENTH(s, 20),,'~tests' ,NAMED('Sample_200'));//will print only 20 records 
+
+//output(s);
+//output(REGEXFINDSET(expr,inDocs[1].content));
+
+////////////////////////////////////
+
+
+
+
+
+
+output(inDocs[1].content,NAMED('Before_init'));
+output(s[1].init,NAMED('After_init'));
+
+enumDocs    := Inverted.EnumeratedDocs(info,  s);
+p1 := Inverted.ParsedText(enumDocs);
+rawPostings := Inverted.RawPostings(enumDocs);
+
+
+
+//OUTPUT(inDocs,,'~ONLINE::Farah::OUT::Solution1',OVERWRITE);
+//OUTPUT(p1,,'~ONLINE::Farah::OUT::Solution2',OVERWRITE);
+//OUTPUT(rawPostings,,'~ONLINE::Farah::OUT::Solution3',OVERWRITE);
+
+//OUTPUT(enumDocs,,'~ONLINE::Farah::OUT::Solution4',OVERWRITE);
+
+
+
+//OUTPUT(ENTH(rawPostings[1]), NAMED('Posting'));//will print only 20 records 
+
+
+output(rawPostings,NAMED('Posting'));
+output(p1,NAMED('parsed'));
+
+
+
+
+
+
+
+
+
+//OUTPUT(rawPostings,,'~ONLINE::Farah::OUT::Solution3',OVERWRITE);
+
+
+/*
+initialism:=REGEXFINDSET(expr,(string)inDocs[1].content);
+output(initialism);
+A :=STD.Str.FilterOut(initialism[1], '.');
+output(A);
+*/
+/*
+cont filters(Inverted.RawPostings doc) := TRANSFORM
+
+
+ SELF.term:='';
+SELF := doc;
+END;
+r:= PROJECT(inDocs, filters(LEFT));
+output(r);
+
+ */
+ 
+ 
+ 
\ No newline at end of file
diff --git a/TextSearch/Inverted/try4.ecl b/TextSearch/Inverted/try4.ecl
new file mode 100644
index 0000000..cbd5d33
--- /dev/null
+++ b/TextSearch/Inverted/try4.ecl
@@ -0,0 +1,247 @@
+﻿//EXPORT try2 := 'todo';
+
+//EXPORT solution := 'todo';
+
+//EXPORT check := 'todo';
+IMPORT TextSearch.Inverted;
+IMPORT TextSearch.Common;
+IMPORT STD;
+IMPORT TextSearch.Inverted.Layouts;
+
+
+
+
+prefix := '~thor::jdh::';
+inputName := prefix + 'corrected_lda_ap_txtt_xml';
+
+Work1 := RECORD
+  UNICODE doc_number{XPATH('/DOC/DOCNO')};
+  UNICODE content{MAXLENGTH(32000000),XPATH('<>')};
+  UNICODE text{MAXLENGTH(32000000),XPATH('/DOC/TEXT')};
+  UNSIGNED8 file_pos{VIRTUAL(fileposition)};
+	UNICODE init;
+	
+END;
+
+
+Inverted.Layouts.DocumentIngest cvt(Work1 lr) := TRANSFORM
+  SELF.identifier := TRIM(lr.doc_number, LEFT,RIGHT);
+  SELF.seqKey := inputName + '-' + INTFORMAT(lr.file_pos,12,1);
+  SELF.slugLine := lr.text[1..STD.Uni.Find(lr.text,'.',1)+1];
+  SELF.content := lr.content;
+	SELF.init:=lr.content;
+
+END;
+
+
+stem := prefix + 'corrected_lda_ap_txtt_xml';
+instance := 'initial2';
+
+ds0 := DATASET(inputName, Work1, XML('/DOC', NOROOT));
+inDocs := PROJECT(ds0, cvt(LEFT));
+//OUTPUT(ENTH(inDocs, 20), NAMED('Sample_20'));//will print only 20 records 
+
+info := Common.FileName_Info_Instance(stem, instance);
+
+///////////////////////////////////
+expr:=U'[a-zA-Z][.][a-zA-Z][.]*[a-zA-Z]*[.]*[a-zA-Z]*';
+
+
+
+Inverted.Layouts.RawPosting filter(Inverted.Layouts.RawPosting doc) := TRANSFORM
+ 
+//SELF.init:=REGEXREPLACE( expr,doc.content,STD.Uni.FilterOut(doc.init, '.'));//+REGEXFINDSET(expr,doc.content);
+
+SELF.term:=REGEXREPLACE( expr,doc.term,STD.Uni.FilterOut(doc.term, '.'));
+
+SELF := doc;
+END;
+
+//OUTPUT(ENTH(s, 20),,'~tests' ,NAMED('Sample_200'));//will print only 20 records 
+
+//output(s);
+//output(REGEXFINDSET(expr,inDocs[1].content));
+
+////////////////////////////////////
+
+Inverted.Layouts.RawPosting filter2(Inverted.Layouts.RawPosting doc) := TRANSFORM
+ 
+//SELF.init:=REGEXREPLACE( expr,doc.content,STD.Uni.FilterOut(doc.init, '.'));//+REGEXFINDSET(expr,doc.content);
+
+SELF.term:=STD.Uni.FindReplace(doc.term,'.','\n');
+ 
+
+
+SELF := doc;
+END;
+ 
+ 
+
+Inverted.Layouts.DocumentIngest filter3(Inverted.Layouts.DocumentIngest doc) := TRANSFORM
+ 
+//SELF.init:=REGEXREPLACE( expr,doc.content,STD.Uni.FilterOut(doc.init, '.'));//+REGEXFINDSET(expr,doc.content);
+
+SELF.content:=STD.Uni.FindReplace(doc.content,'.',' ');
+
+
+
+ 
+SELF := doc;
+END;
+
+
+//output(inDocs[1].content,NAMED('Before_init'));
+//output(s[1].init,NAMED('After_init'));
+
+enumDocs    := Inverted.EnumeratedDocs(info,  inDocs);
+p1 := Inverted.ParsedText(enumDocs);
+rawPostings := Inverted.RawPostings(enumDocs);
+
+s:= PROJECT(rawPostings, filter(LEFT));
+s2:=PROJECT(rawPostings, filter2(LEFT));
+
+
+
+//output(s);
+
+//OUTPUT(inDocs,,'~ONLINE::Farah::OUT::Solution1',OVERWRITE);
+OUTPUT(rawPostings,,'~ONLINE::Farah::OUT::Solution3',OVERWRITE);
+OUTPUT(s,,'~ONLINE::Farah::OUT::Solution2',OVERWRITE);
+OUTPUT(s2,,'~ONLINE::Farah::OUT::Solution4',OVERWRITE);
+
+
+enum2:=PROJECT(inDocs, filter3(LEFT));
+OUTPUT(enum2[1].content,named('farah'));
+
+enumDocs2    := Inverted.EnumeratedDocs(info,  enum2);
+//p11 := Inverted.ParsedText(enumDocs2);
+rawPostings2 := Inverted.RawPostings(enumDocs2);
+OUTPUT(rawPostings2,,'~ONLINE::Farah::OUT::Solution7',OVERWRITE);
+
+
+//OUTPUT(enumDocs,,'~ONLINE::Farah::OUT::Solution4',OVERWRITE);
+
+
+
+//OUTPUT(ENTH(rawPostings[1]), NAMED('Posting'));//will print only 20 records 
+
+
+//output(rawPostings,NAMED('Posting'));
+//output(p1,NAMED('parsed'));
+
+
+
+
+
+
+
+
+
+//OUTPUT(rawPostings,,'~ONLINE::Farah::OUT::Solution3',OVERWRITE);
+
+
+/*
+initialism:=REGEXFINDSET(expr,(string)inDocs[1].content);
+output(initialism);
+A :=STD.Str.FilterOut(initialism[1], '.');
+output(A);
+*/
+/*
+cont filters(Inverted.RawPostings doc) := TRANSFORM
+
+
+ SELF.term:='';
+SELF := doc;
+END;
+r:= PROJECT(inDocs, filters(LEFT));
+output(r);
+
+ */
+e:=REGEXREPLACE( expr,inDocs[1].content ,STD.Uni.FilterOut(inDocs[1].content, '.'));
+
+output(e);
+ 
+ 
+ 
+ds := DATASET([{'thee is anew A.B.C and V.R'}], {STRING100 line}); 
+ 
+ 
+PATTERN expr2 :=PATTERN(U'[a-zA-Z][.][a-zA-Z]*[.][a-zA-Z]*[.]*[a-zA-Z]*');
+
+
+PATTERN ws := PATTERN('[ \t\r\n]'); 
+ 
+
+
+
+ 
+PATTERN Alpha     := PATTERN('[A-Za-z]'); 
+ 
+
+ 
+PATTERN Word  := Alpha+;     
+ 
+
+ 
+PATTERN Article   := ['the', 'A']; 
+ 
+
+ 
+TOKEN JustAWord := expr2 ;
+ 
+ 
+ 
+PATTERN notHen := VALIDATE(Word, MATCHTEXT != 'hen');
+ 
+ 
+ 
+TOKEN NoHenWord := notHen ; 
+ 
+
+ 
+RULE NounPhraseComp1   := JustAWord ;
+ 
+RULE NounPhraseComp2   := NoHenWord | Article ws Word; 
+//RULE Noun3 := NounPhraseComp1 , NounPhraseComp2;
+
+
+ps1 := { 
+ 
+
+
+out1 := MATCHTEXT(NounPhraseComp1) }; 
+ 
+ps2 := { 
+ 
+out2 := MATCHTEXT(NounPhraseComp2) }; 
+
+//ps3 := { 
+ 
+
+
+//out3 := MATCHTEXT(Noun3) }; 
+ 
+
+
+p11 := PARSE(ds, line, NounPhraseComp1, ps1, BEST,MANY,NOCASE); 
+ 
+p22 := PARSE(ds, line, NounPhraseComp2, ps2, BEST,MANY,NOCASE); 
+//p33 := PARSE(ds, line, Noun3, ps3, BEST,MANY,NOCASE); 
+
+output(p11);
+output(p22);
+//output(p33);
+ 
+ p111 := PARSE(inDocs, content, NounPhraseComp1, ps1, BEST,MANY,NOCASE); 
+ output(p111);
+ //pr := Inverted.ParsedText(p111);
+//sss:=REGEXREPLACE( expr,p111[1],STD.Uni.FilterOut(doc.init, '.'));
+//output(p111);
+ p222 := PARSE(inDocs, content, NounPhraseComp2, ps2, BEST,MANY,NOCASE); 
+ output(p222);
+ output(p111+p222);
+  
+
+ 
+ 
+ 
\ No newline at end of file
diff --git a/TextSearch/Inverted/word2vec_1.ecl b/TextSearch/Inverted/word2vec_1.ecl
new file mode 100644
index 0000000..7e6cf49
--- /dev/null
+++ b/TextSearch/Inverted/word2vec_1.ecl
@@ -0,0 +1,102 @@
+﻿
+IMPORT Python;
+#option('outputLimit',100);
+ 
+
+namerec := RECORD
+   string name;
+END;
+ 
+ 
+
+
+
+IMPORT TextSearch2.Inverted;
+IMPORT TextSearch2.Common;
+IMPORT STD;
+IMPORT TextSearch2.Inverted.Layouts;
+
+
+
+
+prefix := '~thor::jdh::';
+inputName := prefix + 'corrected_lda_ap_txtt_xml';
+
+Work1 := RECORD
+  UNICODE doc_number{XPATH('/DOC/DOCNO')};
+  UNICODE content{MAXLENGTH(32000000),XPATH('<>')};
+  UNICODE text{MAXLENGTH(32000000),XPATH('/DOC/TEXT')};
+  UNSIGNED8 file_pos{VIRTUAL(fileposition)};
+	UNICODE init;
+	
+END;
+
+
+Inverted.Layouts.DocumentIngest cvt(Work1 lr) := TRANSFORM
+  SELF.identifier := TRIM(lr.doc_number, LEFT,RIGHT);
+  SELF.seqKey := inputName + '-' + INTFORMAT(lr.file_pos,12,1);
+  SELF.slugLine := lr.text[1..STD.Uni.Find(lr.text,'.',1)+1];
+  SELF.content := lr.content;
+	SELF.init:=lr.content;
+
+END;
+
+
+stem := prefix + 'corrected_lda_ap_txtt_xml';
+instance := 'initial2';
+
+ds0 := DATASET(inputName, Work1, XML('/DOC', NOROOT));
+inDocs := PROJECT(ds0, cvt(LEFT));
+ 
+info := Common.FileName_Info_Instance(stem, instance);
+
+ 
+expr:=U'[a-zA-Z][.][a-zA-Z][.]*[a-zA-Z]*[.]*[a-zA-Z]*';
+expr2:='[a-zA-Z][.][a-zA-Z][.]*[a-zA-Z]*[.]*[a-zA-Z]*';
+
+
+
+
+ 
+enumDocs    := Inverted.EnumeratedDocs(info,  inDocs);
+p1 := Inverted.ParsedText(enumDocs);
+rawPostings := Inverted.RawPostings(enumDocs);
+
+OUTPUT(enumDocs,,'~ONLINE::Farah::OUT::Solution77',OVERWRITE);
+ 
+rec0 := RECORD
+  unicode cell;
+END;
+
+rec := RECORD
+DATASET(rec0) arow;
+END;
+
+
+
+
+import python;
+DATASET(rec0) word2vec(dataset(Inverted.Layouts.DocumentIngest) A) := embed(Python)
+
+	
+	import numpy as np
+	import re
+	import gensim
+
+	s=[]
+	for n in A:
+		s.append(gensim.utils.simple_preprocess(unicode(n.content)))
+	model = gensim.models.Word2Vec(s,size=150,window=10,min_count=2,workers=10)
+	model.train(s,total_examples=len(s),epochs=10)
+	w1 = "school"
+	r= model.wv.most_similar(positive=w1)
+	return r
+	 
+endembed;
+
+
+ 
+	  
+   
+
+ OUTPUT(CHOOSEN(word2vec(inDocs), 200), ALL, NAMED('First_200_blocks'));
diff --git a/TextSearch/Inverted/word2vec_2.ecl b/TextSearch/Inverted/word2vec_2.ecl
new file mode 100644
index 0000000..a19258e
--- /dev/null
+++ b/TextSearch/Inverted/word2vec_2.ecl
@@ -0,0 +1,108 @@
+﻿
+IMPORT Python;
+#option('outputLimit',100);
+
+namerec := RECORD
+   string name;
+END;
+ 
+ 
+
+
+
+IMPORT TextSearch2.Inverted;
+IMPORT TextSearch2.Common;
+IMPORT STD;
+IMPORT TextSearch2.Inverted.Layouts;
+
+
+
+
+prefix := '~thor::jdh::';
+inputName := prefix + 'corrected_lda_ap_txtt_xml';
+
+Work1 := RECORD
+  UNICODE doc_number{XPATH('/DOC/DOCNO')};
+  UNICODE content{MAXLENGTH(32000000),XPATH('<>')};
+  UNICODE text{MAXLENGTH(32000000),XPATH('/DOC/TEXT')};
+  UNSIGNED8 file_pos{VIRTUAL(fileposition)};
+	UNICODE init;
+	
+END;
+
+
+Inverted.Layouts.DocumentIngest cvt(Work1 lr) := TRANSFORM
+  SELF.identifier := TRIM(lr.doc_number, LEFT,RIGHT);
+  SELF.seqKey := inputName + '-' + INTFORMAT(lr.file_pos,12,1);
+  SELF.slugLine := lr.text[1..STD.Uni.Find(lr.text,'.',1)+1];
+  SELF.content := lr.content;
+	SELF.init:=lr.content;
+
+END;
+
+
+stem := prefix + 'corrected_lda_ap_txtt_xml';
+instance := 'initial2';
+
+ds0 := DATASET(inputName, Work1, XML('/DOC', NOROOT));
+inDocs := PROJECT(ds0, cvt(LEFT));
+ 
+info := Common.FileName_Info_Instance(stem, instance);
+
+ 
+expr:=U'[a-zA-Z][.][a-zA-Z][.]*[a-zA-Z]*[.]*[a-zA-Z]*';
+expr2:='[a-zA-Z][.][a-zA-Z][.]*[a-zA-Z]*[.]*[a-zA-Z]*';
+
+
+
+
+ 
+enumDocs    := Inverted.EnumeratedDocs(info,  inDocs);
+p1 := Inverted.ParsedText(enumDocs);
+rawPostings := Inverted.RawPostings(enumDocs);
+
+OUTPUT(enumDocs,,'~ONLINE::Farah::OUT::Solution77',OVERWRITE);
+
+rec0 := RECORD
+  set of unicode cell;
+END;
+
+rec := RECORD
+DATASET(rec0) arow;
+END;
+
+
+
+
+import python;
+DATASET(rec0) word2vec(dataset(Inverted.Layouts.DocumentIngest) A, unicode word) := embed(Python)
+
+	
+	import numpy as np
+	import re
+	import gensim
+
+	s=[]
+	for n in A:
+		s.append(gensim.utils.simple_preprocess(unicode(n.content)))
+	model = gensim.models.Word2Vec(s,size=150,window=10,min_count=2,workers=10)
+	model.train(s,total_examples=len(s),epochs=10)
+	w1 =word.split()
+	r=[]
+	for i in w1:
+		r.append([i,unicode(model.wv.most_similar(positive=(i)))])
+	return r
+	 
+endembed;
+
+
+ 
+	  
+ query:=u'students in school' ;
+ 
+
+res:=word2vec(inDocs,query);
+Output(res);
+ 
+
+
diff --git a/TextSearch/Inverted/word2vec_3.ecl b/TextSearch/Inverted/word2vec_3.ecl
new file mode 100644
index 0000000..2616f23
--- /dev/null
+++ b/TextSearch/Inverted/word2vec_3.ecl
@@ -0,0 +1,144 @@
+﻿ 
+IMPORT Python;
+#option('outputLimit',100);
+ 
+namerec := RECORD
+   string name;
+END;
+ 
+ 
+
+
+
+IMPORT TextSearch2.Inverted;
+IMPORT TextSearch2.Common;
+IMPORT STD;
+IMPORT TextSearch2.Inverted.Layouts;
+
+
+
+
+prefix := '~thor::jdh::';
+inputName := prefix + 'corrected_lda_ap_txtt_xml';
+
+Work1 := RECORD
+  UNICODE doc_number{XPATH('/DOC/DOCNO')};
+  UNICODE content{MAXLENGTH(32000000),XPATH('<>')};
+  UNICODE text{MAXLENGTH(32000000),XPATH('/DOC/TEXT')};
+  UNSIGNED8 file_pos{VIRTUAL(fileposition)};
+	UNICODE init;
+	
+END;
+
+
+Inverted.Layouts.DocumentIngest cvt(Work1 lr) := TRANSFORM
+  SELF.identifier := TRIM(lr.doc_number, LEFT,RIGHT);
+  SELF.seqKey := inputName + '-' + INTFORMAT(lr.file_pos,12,1);
+  SELF.slugLine := lr.text[1..STD.Uni.Find(lr.text,'.',1)+1];
+  SELF.content := lr.content;
+	SELF.init:=lr.content;
+
+END;
+
+
+stem := prefix + 'corrected_lda_ap_txtt_xml';
+instance := 'initial2';
+
+ds0 := DATASET(inputName, Work1, XML('/DOC', NOROOT));
+inDocs := PROJECT(ds0, cvt(LEFT));
+ 
+info := Common.FileName_Info_Instance(stem, instance);
+
+ 
+expr:=U'[a-zA-Z][.][a-zA-Z][.]*[a-zA-Z]*[.]*[a-zA-Z]*';
+expr2:='[a-zA-Z][.][a-zA-Z][.]*[a-zA-Z]*[.]*[a-zA-Z]*';
+
+
+
+
+ 
+OUTPUT(inDocs);
+
+ 
+ 
+
+rec0 := RECORD
+  unicode cell;
+END;
+
+rec := RECORD
+DATASET(rec0) arow;
+END;
+
+
+
+
+import python;
+DATASET(rec0) word2vec(dataset(Inverted.Layouts.DocumentIngest) A, unicode word) := embed(Python)
+
+	
+	import numpy as np
+	import re
+	import gensim
+
+	s=[]
+	for n in A:
+		s.append(gensim.utils.simple_preprocess(unicode(n.content)))
+	model = gensim.models.Word2Vec(s,size=150,window=10,min_count=2,workers=10)
+	model.train(s,total_examples=len(s),epochs=10)
+	w1 =word.split()
+	r=[]
+	for i in w1:
+		r.append([i,unicode(model.wv.most_similar(positive=(i)))])
+	
+	return (r[0][1]).split(',')
+	 
+endembed;
+
+
+ 
+	  
+ query:=u'students in school' ;
+ 
+
+ 
+res:=word2vec(inDocs,query);
+Output(res);
+
+
+rec2 := RECORD
+   DATASET (Inverted.Layouts.DocumentIngest) cell;
+END;
+ Dataset(rec2)  filter(dataset(Inverted.Layouts.DocumentIngest) A, DATASET (rec0) B) := embed(Python)
+	
+	import numpy as np
+	import re
+	import gensim
+	s=[]
+	r=[]
+	m=[]
+	l=[]
+				
+	for i in B:
+		for n in A:
+			if (unicode (n.content).find(unicode(i.cell))!=0):
+				if (n.content not in m):
+					m.append([n.content])
+					l.append([n])
+
+
+	
+	return l
+endembed;
+
+res2:=filter(inDocs,res);
+Output(res2);
+OUTPUT(CHOOSEN(res2, 100), ALL, NAMED('First_100_blocks'));
+ 
+ 
+ 
+
+
+
+  
+           
\ No newline at end of file