package bio_io

  1. Overview
  2. Docs

In_channel for Btab files where each query in the file is a single record.

Overview

WARNING: This module assumes that queries are sorted. One case in which this assumption does not hold is with mmseqs when using more than one iteration. E.g., mmseqs easy-search --num-iterations 3. This behavior will likely change in the future.

You should consider this module experimental.

Example

Here is a short example program. It reads a btab file and prints out the records.

open! Base
open! Bio_io.Btab_queries

let parse_argv () =
  match Sys.get_argv () with
  | [| _; file_name |] -> file_name
  | _ -> failwith "missing file_name"

let () =
  let file_name = parse_argv () in
  In_channel.with_file_iter_records file_name ~f:(fun r ->
      Stdio.print_endline "===";
      Stdio.print_endline @@ Record.query r;
      let hits = List.map ~f:Bio_io.Btab.Record.parse @@ Record.hits r in
      Stdio.print_s @@ [%sexp_of: Bio_io.Btab.Record.Parsed.t list] hits)

The output will be somthing like.

===
Q 1
(((query "Q 1") (target q1t1) (pident 0.1) (alnlen 2) (mismatch 3)
  (gapopen 4) (qstart 5) (qend 6) (tstart 7) (tend 8) (evalue 9.99E-05)
  (bits 10) (qlen ()) (tlen ()))
 ((query "Q 1") (target q1t2) (pident 0.11) (alnlen 12) (mismatch 13)
  (gapopen 14) (qstart 15) (qend 16) (tstart 17) (tend 18) (evalue 1.9E-05)
  (bits 20) (qlen ()) (tlen ())))
===
Q_2
(((query Q_2) (target q2t1) (pident 0.21) (alnlen 22) (mismatch 23)
  (gapopen 24) (qstart 25) (qend 26) (tstart 27) (tend 28) (evalue 2.9E-05)
  (bits 30) (qlen ()) (tlen ())))
module T : sig ... end
include module type of struct include T end
include module type of struct include Private.Peekable_in_channel end

A wrapper of Jane Street's Stdio.In_channel. Add's peek_char and peek_line functions that work on fifos as well as regular files.

Used internally for bio input channels so that you can pipe directly from gzip even in channels that need peeking.

Differences from Stdio.In_channel

  • No binary mode

Some functions are not implemented.

  • input
  • really_input
  • really_input_exn
  • input_char
  • input_byte
  • input_binary_int
  • unsafe_input_value
  • input_buffer
  • seek
  • pos
  • length
  • set_binary_mode_out
include Ppx_compare_lib.Equal.S with type t := t
val input_all : t -> Base.string
val input_line : ?fix_win_eol:Base.bool -> t -> Base.string Base.option
val input_line_exn : ?fix_win_eol:Base.bool -> t -> Base.string
val fold_lines : ?fix_win_eol:Base.bool -> t -> init:'a -> f:('a -> Base.string -> 'a) -> 'a
val input_lines : ?fix_win_eol:Base.bool -> t -> Base.string Base.list
val iter_lines : ?fix_win_eol:Base.bool -> t -> f:(Base.string -> Base.unit) -> Base.unit
val read_lines : ?fix_win_eol:Base.bool -> Base.string -> Base.string Base.list
val read_all : Base.string -> Base.string

Both peek functions are safe to call in the context of one of the iterator functions.

val peek_char : ?fix_win_eol:Base.bool -> t -> Base.char Base.option
val peek_line : ?fix_win_eol:Base.bool -> t -> Base.string Base.option
type record = Record.t
include sig ... end
val stdin : T.t
val create : Base.string -> T.t
val close : T.t -> Base.unit
val with_file : Base.string -> f:(T.t -> 'a) -> 'a
val equal : T.t -> T.t -> Base.bool
val input_record : T.t -> T.record Base.option
val fold_records : T.t -> init:'a -> f:('a -> T.record -> 'a) -> 'a
val foldi_records : T.t -> init:'a -> f:(Base.int -> 'a -> T.record -> 'a) -> 'a
val with_file_fold_records : Base.string -> init:'a -> f:('a -> T.record -> 'a) -> 'a
val with_file_foldi_records : Base.string -> init:'a -> f:(Base.int -> 'a -> T.record -> 'a) -> 'a
val iter_records : T.t -> f:(T.record -> Base.unit) -> Base.unit
val iteri_records : T.t -> f:(Base.int -> T.record -> Base.unit) -> Base.unit
val with_file_iter_records : Base.string -> f:(T.record -> Base.unit) -> Base.unit
val with_file_iteri_records : Base.string -> f:(Base.int -> T.record -> Base.unit) -> Base.unit
val records : T.t -> T.record Base.list
val with_file_records : Base.string -> T.record Base.list
val record_sequence : T.t -> T.record Base.Sequence.t