package delimited_parsing

  1. Overview
  2. Docs

Read CSVs & CSV-like delimited formats (following the CSV quoting behaviour).

These formats are loosely documented by RFC 4180: https://www.ietf.org/rfc/rfc4180.txt

include module type of Delimited_kernel.Read

Read CSVs & CSV-like delimited formats (following the CSV quoting behaviour).

These formats are loosely documented by RFC 4180: https://www.ietf.org/rfc/rfc4180.txt

exception Bad_csv_formatting of string list * string

This provides an applicative interface for constructing values from a csv file.

An 'a t describes how to build an OCaml model 'a for each row.

Simple example:

type t =
  { foo : int
  ; bar : string
  }

(* Describes how to generate a [t] from a row of a csv file *)
let parse : t Delimited_kernel.Parse.t =
  let open Delimited_kernel.Parse.Let_syntax in
  let%map_open foo = at_header "foo" ~f:Int.of_string
  and bar = at_header "bar" ~f:String.of_string in
  { foo; bar }
;;

let _ =
  Delimited_kernel.Parse.list_of_string ~header:`Yes parse
    "foo,bar\n2,\"hello, world\"\n"
;;
include Core_kernel.Applicative.S with type 'a t := 'a t
val return : 'a -> 'a t
val map : 'a t -> f:('a -> 'b) -> 'b t
val both : 'a t -> 'b t -> ('a * 'b) t
val (<*>) : ('a -> 'b) t -> 'a t -> 'b t

same as apply

val (<*) : 'a t -> unit t -> 'a t
val (*>) : unit t -> 'a t -> 'a t
val (>>|) : 'a t -> ('a -> 'b) -> 'b t
val apply : ('a -> 'b) t -> 'a t -> 'b t
val map2 : 'a t -> 'b t -> f:('a -> 'b -> 'c) -> 'c t
val map3 : 'a t -> 'b t -> 'c t -> f:('a -> 'b -> 'c -> 'd) -> 'd t
val all : 'a t list -> 'a list t
val all_unit : unit t list -> unit t
val all_ignore : unit t list -> unit t
  • deprecated [since 2018-02] Use [all_unit]
module Applicative_infix = Delimited_kernel.Read.Applicative_infix
val at_index : int -> f:(string -> 'a) -> 'a t

Read a field at the given index. Use f to convert the field from string.

val at_header : string -> f:(string -> 'a) -> 'a t

Read a field at the given header. Use f to convert the field from string.

Note that if the given header is not provided through either the file or the ~header argument to the parsers, this will fail at runtime.

val at_header_opt : string -> f:(string option -> 'a) -> 'a t

Read a field at the given header, if it exists. Use f to convert the field from string.

module On_invalid_row = Delimited_kernel.Read.On_invalid_row

Header parsing control

Whole-row parsing.

val fold_string : ?strip:bool -> ?sep:char -> ?quote:[ `No_quoting | `Using of char ] -> ?header:Header.t -> ?on_invalid_row:'a On_invalid_row.t -> 'a t -> init:'b -> f:('b -> 'a -> 'b) -> string -> 'b

Fold the CSV rows contained in the given string.

val list_of_string : ?strip:bool -> ?sep:char -> ?quote:[ `No_quoting | `Using of char ] -> ?header:Header.t -> ?on_invalid_row:'a On_invalid_row.t -> 'a t -> string -> 'a list

Load the CSV as a list

Experts only. If you really think you need a function in this module, please talk to a delimited dev first.

Async helpers for delimited parsing

val fold_reader : ?strip:bool -> ?skip_lines:int -> ?sep:char -> ?quote:[ `No_quoting | `Using of char ] -> ?header:Header.t -> ?on_invalid_row:'a On_invalid_row.t -> 'a t -> init:'b -> f:('b -> 'a -> 'b Async.Deferred.t) -> Async.Reader.t -> 'b Async.Deferred.t

fold_reader ?strip ?skip_lines ?sep ?quote ~init ~f r produces a value by folding over a csv document read from r. The reader will be closed on EOF.

If strip is true, leading and trailing whitespace is stripped from each field. Default value is false.

If skip_lines > 0, that many lines are skipped at the start of the input. Note that this skips lines without doing any CSV parsing of the lines being skipped, so newlines within a quoted field are treated identically to newlines outside a quoted field. Default value is 0.

sep is the character that separates fields within a row. Default value is ','

quote defines a character to use for quoting. `Using '"' implements the MS Excel convention: either a field is unquoted, or it has leading and trailing quotes and internal escaped characters are represented as quote-char char, e.g., "\n to escape a newline. `No_quoting means all characters are literal. The default is `Using '"'

val fold_reader' : ?strip:bool -> ?skip_lines:int -> ?sep:char -> ?quote:[ `No_quoting | `Using of char ] -> ?header:Header.t -> ?on_invalid_row:'a On_invalid_row.t -> 'a t -> init:'b -> f:('b -> 'a Core.Queue.t -> 'b Async.Deferred.t) -> Async.Reader.t -> 'b Async.Deferred.t

fold_reader' ?strip ?skip_lines ?sep ?quote ~init ~f r works similarly to fold_reader, except for the f argument. fold_reader' runs f on batches of Row.ts rather than running f on each individual row.

val fold_reader_without_pushback : ?strip:bool -> ?skip_lines:int -> ?sep:char -> ?quote:[ `No_quoting | `Using of char ] -> ?header:Header.t -> ?on_invalid_row:'a On_invalid_row.t -> 'a t -> init:'b -> f:('b -> 'a -> 'b) -> Async.Reader.t -> 'b Async.Deferred.t

Same as fold_reader but the fold function does not exert pushback on the fold.

val pipe_of_reader : ?strip:bool -> ?skip_lines:int -> ?sep:char -> ?quote:[ `No_quoting | `Using of char ] -> ?header:Header.t -> ?on_invalid_row:'a On_invalid_row.t -> 'a t -> Async.Reader.t -> 'a Async.Pipe.Reader.t

pipe_of_reader t reader produces a pipe reader of parsed values.

val create_reader : ?strip:bool -> ?skip_lines:int -> ?sep:char -> ?quote:[ `No_quoting | `Using of char ] -> ?header:Header.t -> ?on_invalid_row:'a On_invalid_row.t -> 'a t -> string -> 'a Async.Pipe.Reader.t Async.Deferred.t

create_reader filename opens a reader for the given filename & returns a pipe of its parsed values.