From: Sylvain Le Gall <gildor@debian.org>
Date: Sun, 1 Sep 2019 21:33:40 +0200
Subject: Allow the user to add a pattern to skip the beginning of a file
 (e.g. for XML files)

---
 Depend           | 22 ++++++++--------
 Makefile.in      |  6 ++---
 config_parse.mly | 79 ++++++++++++++++++++++++++++++++++++++++++--------------
 main.ml          | 23 ++++++++++++-----
 model.ml         |  4 +--
 skip.ml          | 36 ++++++++++++++++++++++++++
 6 files changed, 128 insertions(+), 42 deletions(-)
 create mode 100644 skip.ml

diff --git a/Depend b/Depend
index 8931533..558d09f 100644
--- a/Depend
+++ b/Depend
@@ -1,11 +1,11 @@
-config_lex.cmo: config.cmo config_parse.cmi 
-config_lex.cmx: config.cmx config_parse.cmx 
-config_parse.cmo: config.cmo model.cmo config_parse.cmi 
-config_parse.cmx: config.cmx model.cmx config_parse.cmi 
-main.cmo: config.cmo config_builtin.cmo config_lex.cmo config_parse.cmi \
-    info.cmo model.cmo 
-main.cmx: config.cmx config_builtin.cmx config_lex.cmx config_parse.cmx \
-    info.cmx model.cmx 
-mkconfig.cmo: config.cmo config_lex.cmo config_parse.cmi 
-mkconfig.cmx: config.cmx config_lex.cmx config_parse.cmx 
-config_parse.cmi: model.cmo 
+config_lex.cmo: config_parse.cmi config.cmo 
+config_lex.cmx: config_parse.cmx config.cmx 
+config_parse.cmo: skip.cmo model.cmo config.cmo config_parse.cmi 
+config_parse.cmx: skip.cmx model.cmx config.cmx config_parse.cmi 
+main.cmo: model.cmo info.cmo config_parse.cmi config_lex.cmo \
+    config_builtin.cmo config.cmo 
+main.cmx: model.cmx info.cmx config_parse.cmx config_lex.cmx \
+    config_builtin.cmx config.cmx 
+mkconfig.cmo: config_parse.cmi config_lex.cmo config.cmo 
+mkconfig.cmx: config_parse.cmx config_lex.cmx config.cmx 
+config_parse.cmi: skip.cmo model.cmo 
diff --git a/Makefile.in b/Makefile.in
index 39bbcaf..9ffce52 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -15,9 +15,9 @@ INSTALLDIR=@INSTALLDIR@
 
 GENERATED= config_parse.mli config_parse.ml config_lex.ml config_builtin.ml
 
-MKCONFIG.CU=model config config_parse config_lex mkconfig
+MKCONFIG.CU=skip model config config_parse config_lex mkconfig
 MKCONFIG.CMO=$(MKCONFIG.CU:%=%.cmo)
-CU=info model config config_parse config_lex config_builtin main
+CU=info skip model config config_parse config_lex config_builtin main
 CMO=$(CU:%=%.cmo)
 
 
@@ -81,4 +81,4 @@ config_builtin.ml: config_builtin mkconfig
 		$(OCAMLYACC) -v $<
 
 
-include Depend
+-include Depend
diff --git a/config_parse.mly b/config_parse.mly
index 40c9213..6d6752c 100644
--- a/config_parse.mly
+++ b/config_parse.mly
@@ -49,6 +49,29 @@
 
 %{
 open Printf
+
+type entry = 
+  | EntryModel of Model.generator 
+  | EntrySkip of Skip.regexp_skip list
+;;
+
+(* Dispatch entry considering if it is a skip or a model.
+ * List returned are reversed considering their initial order.
+ *)
+let rec dispatch_entry acc_model acc_skip lst =
+  match lst with
+  | (rg_filename, EntryModel mdl) :: tl ->
+      dispatch_entry ((rg_filename, mdl) :: acc_model) acc_skip tl
+  | (rg_filename, EntrySkip rg_skip_lst) :: tl ->
+      let nacc_skip =
+        List.fold_left 
+        (fun nacc_skip rg_skip -> (rg_filename, rg_skip) :: nacc_skip) 
+        acc_skip
+        rg_skip_lst
+      in
+      dispatch_entry acc_model nacc_skip tl
+  | [] ->
+      acc_model, acc_skip
 %}
 
 %token ARROW
@@ -58,14 +81,14 @@ open Printf
 %token <string> STRING
 
 %start configfile
-%type <(Str.regexp * Model.generator) list > configfile
+%type <((Str.regexp * Model.generator) list) * ((Str.regexp * Skip.regexp_skip) list)> configfile
 %start boot
 %type <(string * string * (string * string) list) list> boot
 
 %%
 
 configfile:
-  opt_pipe item_list EOF                            { List.rev $2 }
+  opt_pipe item_list EOF                            { dispatch_entry [] [] $2 }
 ;
 
 opt_pipe:
@@ -89,23 +112,41 @@ item:
 	  raise (Config.Error (sprintf "Illegal regexp: %s" msg,
 			       Parsing.rhs_start 1, Parsing.rhs_end 1))
     in
-    let model =
-      try
-	Model.find $3
-      with
-	Not_found ->
-	  raise (Config.Error (sprintf "Unknown model: %s" $3,
-			       Parsing.rhs_start 3, Parsing.rhs_end 3))
-    in
-    let generator =
-      try
-	model (List.rev $4)
-      with
-	Model.Error msg ->
-	  raise (Config.Error (msg,
-			       Parsing.rhs_start 3, Parsing.rhs_end 4))
-    in
-    regexp, generator
+    if $3 = "skip" then
+      let fun_parameters (id, str) =
+        if id = "match" then
+          try 
+            Str.regexp ("^" ^ str ^ "$")
+          with
+            Failure msg ->
+              raise (Config.Error (sprintf "Illegal regexp: %s" msg,
+                                   Parsing.rhs_start 1, Parsing.rhs_end 1))
+        else
+          raise (Config.Error (sprintf "Unkown option '%s' for skip" id,
+                                 Parsing.rhs_start 3, Parsing.rhs_end 3))
+      in
+      let skip_lst =
+        List.map fun_parameters (List.rev $4)
+      in
+      regexp, (EntrySkip skip_lst)
+    else
+      let model =
+        try
+          Model.find $3
+        with
+          Not_found ->
+            raise (Config.Error (sprintf "Unknown model: %s" $3,
+                                 Parsing.rhs_start 3, Parsing.rhs_end 3))
+      in
+      let generator =
+        try
+          model (List.rev $4)
+        with
+          Model.Error msg ->
+            raise (Config.Error (msg,
+                                 Parsing.rhs_start 3, Parsing.rhs_end 4))
+      in
+      regexp, (EntryModel generator)
   }
 ;
 
diff --git a/main.ml b/main.ml
index 972a349..4f80c8f 100644
--- a/main.ml
+++ b/main.ml
@@ -25,13 +25,17 @@ open Config_builtin
 
 let generators : (Str.regexp * Model.generator) list ref = ref []
 
+let skips : (Str.regexp * Skip.regexp_skip) list ref = ref []
+
 let read_configfile filename =
   let ic = open_in filename in
   let lexbuf = Lexing.from_channel ic in
   try
-    generators := 
+    let (config_generators, config_skips) =
       (Config_parse.configfile Config_lex.token lexbuf)
-      @ !generators;
+    in
+    skips := config_skips @ !skips;
+    generators := config_generators @ !generators;
     close_in ic
   with
     Config.Error (msg, loc1, loc2) ->
@@ -44,8 +48,6 @@ let read_configfile filename =
 	Sys.argv.(0)
 	filename (Lexing.lexeme_start lexbuf) (Lexing.lexeme_end lexbuf)
 
-
-
 let find_generator filename =
   let basename = Filename.basename filename in
   try
@@ -61,6 +63,11 @@ let find_generator filename =
 	Sys.argv.(0) filename;
       exit 2
 
+let find_skips filename = 
+  List.filter
+    (fun (rg_filename, _) -> Str.string_match rg_filename filename 0)
+    !skips
+
 
 
 (***************************************************************************)
@@ -144,12 +151,13 @@ let copy ic oc =
   in
   loop ()
 
-
-
 let create_header header filename =
   let generator = find_generator filename in
+  let skip_lst = find_skips filename in
   pipe_file (fun ic oc ->
+    let () = Skip.skip skip_lst ic oc in
     let line = generator.Model.remove ic in
+    let () = Skip.skip skip_lst ic oc in
     generator.Model.create oc header;
     output_string oc line;
     copy ic oc
@@ -159,8 +167,11 @@ let create_header header filename =
 
 let remove_header filename =
   let generator = find_generator filename in
+  let skip_lst = find_skips filename in
   pipe_file (fun ic oc ->
+    let () = Skip.skip skip_lst ic oc in
     let line = generator.Model.remove ic in
+    let () = Skip.skip skip_lst ic oc in
     output_string oc line;
     copy ic oc
   ) filename
diff --git a/model.ml b/model.ml
index b9163e9..6c42c41 100644
--- a/model.ml
+++ b/model.ml
@@ -26,11 +26,9 @@ exception Error of string
 
 type generator =
     { remove: in_channel -> string;
-      create: out_channel -> string list -> unit
+      create: out_channel -> string list -> unit;
     } 
 
-
-
 (***************************************************************************)
 (** {2 Models} *)
 
diff --git a/skip.ml b/skip.ml
new file mode 100644
index 0000000..dad4c55
--- /dev/null
+++ b/skip.ml
@@ -0,0 +1,36 @@
+
+
+type regexp_filename = Str.regexp
+;;
+
+type regexp_skip = Str.regexp
+;;
+
+let skip skip_lst ic oc =
+  let skip_aux () =
+    let initial_pos =
+      LargeFile.pos_in ic
+    in
+    try
+      let line = 
+        input_line ic
+      in
+      try
+        let _ =
+          List.find 
+            (fun (_, rg_skip) -> Str.string_match rg_skip line 0)
+            skip_lst
+        in
+          prerr_endline 
+            ("Line : "^line^" skipped");
+          output_string oc line;
+          output_string oc "\n"
+      with Not_found ->
+        LargeFile.seek_in ic initial_pos
+    with End_of_file ->
+      ()
+  in
+    skip_aux ()
+;; 
+
+
