SeqAn3 3.3.0-rc.1
The Modern C++ library for sequence analysis.
io/sam_file/detail/cigar.hpp
Go to the documentation of this file.
1// -----------------------------------------------------------------------------------------------------
2// Copyright (c) 2006-2022, Knut Reinert & Freie Universität Berlin
3// Copyright (c) 2016-2022, Knut Reinert & MPI für molekulare Genetik
4// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6// -----------------------------------------------------------------------------------------------------
7
13#pragma once
14
15#include <algorithm>
16#include <concepts>
17#include <ranges>
18#include <seqan3/std/charconv>
19#include <sstream>
20
29
30namespace seqan3::detail
31{
34struct view_equality_fn
35{
37 template <std::ranges::forward_range rng1_type, std::ranges::forward_range rng2_type>
38 constexpr bool operator()(rng1_type && rng1, rng2_type && rng2) const
39 {
40 return std::ranges::equal(rng1, rng2);
41 }
42};
43
51inline void update_alignment_lengths(int32_t & ref_length,
52 int32_t & seq_length,
53 char const cigar_operation,
54 uint32_t const cigar_count)
55{
56 switch (cigar_operation)
57 {
58 case 'M':
59 case '=':
60 case 'X':
61 ref_length += cigar_count, seq_length += cigar_count;
62 break;
63 case 'D':
64 case 'N':
65 ref_length += cigar_count;
66 break;
67 case 'I':
68 seq_length += cigar_count;
69 break;
70 case 'S':
71 case 'H':
72 case 'P':
73 break; // no op (soft-clipping or padding does not increase either length)
74 default:
75 throw format_error{"Illegal cigar operation: " + std::string{cigar_operation}};
76 }
77}
78
93template <typename cigar_input_type>
94SEQAN3_WORKAROUND_LITERAL std::tuple<std::vector<cigar>, int32_t, int32_t> parse_cigar(cigar_input_type && cigar_input)
95{
96 std::vector<cigar> operations{};
97 std::array<char, 20> buffer{}; // buffer to parse numbers with from_chars. Biggest number should fit in uint64_t
98 char cigar_operation{};
99 uint32_t cigar_count{};
100 int32_t ref_length{}, seq_length{}; // length of aligned part for ref and query
101
102 // transform input into a single input view if it isn't already
103 auto cigar_view = cigar_input | views::single_pass_input;
104
105 // parse the rest of the cigar
106 // -------------------------------------------------------------------------------------------------------------
107 while (std::ranges::begin(cigar_view) != std::ranges::end(cigar_view)) // until stream is not empty
108 {
109 auto buff_end = (std::ranges::copy(cigar_view | detail::take_until_or_throw(!is_digit), buffer.data())).out;
110 cigar_operation = *std::ranges::begin(cigar_view);
111 ++std::ranges::begin(cigar_view);
112
113 if (std::from_chars(buffer.begin(), buff_end, cigar_count).ec != std::errc{})
114 throw format_error{"Corrupted cigar string encountered"};
115
116 update_alignment_lengths(ref_length, seq_length, cigar_operation, cigar_count);
117 operations.emplace_back(cigar_count, cigar::operation{}.assign_char(cigar_operation));
118 }
119
120 return {operations, ref_length, seq_length};
121}
122
128[[nodiscard]] inline std::string get_cigar_string(std::vector<cigar> const & cigar_vector)
129{
130 std::string result{};
131 std::ranges::for_each(cigar_vector,
132 [&result](auto & cig)
133 {
134 result.append(static_cast<std::string_view>(cig.to_string()));
135 });
136 return result;
137}
138
172template <seqan3::aligned_sequence ref_seq_type, seqan3::aligned_sequence query_seq_type>
173[[nodiscard]] inline std::string get_cigar_string(ref_seq_type && ref_seq,
174 query_seq_type && query_seq,
175 uint32_t const query_start_pos = 0,
176 uint32_t const query_end_pos = 0,
177 bool const extended_cigar = false)
178{
179 return get_cigar_string(std::tie(ref_seq, query_seq), query_start_pos, query_end_pos, extended_cigar);
180}
181
184struct access_restrictor_fn
185{
187 template <typename chr_t>
188 [[noreturn]] chr_t operator()(chr_t) const
189 {
190 throw std::logic_error{"Access is not allowed because there is no sequence information."};
191 }
192};
193
194} // namespace seqan3::detail
Provides the seqan3::cigar alphabet.
T begin(T... args)
The <charconv> header from C++17's standard library.
constexpr derived_type & assign_char(char_type const chr) noexcept
Assign from a character, implicitly converts invalid characters.
Definition: alphabet_base.hpp:163
exposition_only::cigar_operation operation
The (extended) cigar operation alphabet of M,D,I,H,N,P,S,X,=.
Definition: alphabet/cigar/cigar.hpp:96
T copy(T... args)
T equal(T... args)
T for_each(T... args)
T from_chars(T... args)
constexpr auto is_digit
Checks whether c is a digital character.
Definition: predicate.hpp:262
constexpr auto single_pass_input
A view adapter that decays most of the range properties and adds single pass behavior.
Definition: single_pass_input.hpp:348
Provides seqan3::detail::pairwise_alignment and seqan3::detail::writable_pairwise_alignment.
#define SEQAN3_WORKAROUND_LITERAL
Our char literals returning std::vector should be constexpr if constexpr std::vector is supported.
Definition: platform.hpp:282
Provides character predicates for tokenisation.
Provides seqan3::views::single_pass_input.
Provides seqan3::views::take_until and seqan3::views::take_until_or_throw.
T tie(T... args)
Auxiliary for pretty printing of exception messages.
Provides seqan3::tuple_like.
Provides seqan3::views::zip.