Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update GFA reader to support GFA v1.2 paths #1405

Merged
merged 1 commit into from
Nov 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/common/io/graph/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@ project(graphio CXX)
include_directories(${CMAKE_CURRENT_SOURCE_DIR})

add_library(graphio STATIC
gfa.cpp gfa_reader.cpp gfa_writer.cpp
gfa.cpp cigar.cpp gfa_reader.cpp gfa_writer.cpp
fastg_writer.cpp)
target_link_libraries(graphio foonathan::lexy zlibstatic)
33 changes: 33 additions & 0 deletions src/common/io/graph/cigar.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
//***************************************************************************
//* Copyright (c) 2023-2024 SPAdes team
//* All Rights Reserved
//* See file LICENSE for details.
//***************************************************************************

#include <lexy/action/parse.hpp> // lexy::parse
#include <lexy/input/string_input.hpp>
#include <lexy_ext/report_error.hpp>

#include "cigar.hpp"

#include "cigar.inl"

namespace cigar {
std::ostream &operator<<(std::ostream &s, const tag &t) {
s << t.name[0] << t.name[1] << ':';
return std::visit([&](const auto& value) -> std::ostream& { return s << value; }, t.val);
}

std::optional<tag> parseTag(const char* line, size_t len) {
lexy::visualization_options opts;
opts.max_lexeme_width = 35;

auto result = lexy::parse<grammar::tag>(lexy::string_input(line, len), lexy_ext::report_error.opts(opts));
if (result.has_value())
return std::make_optional(result.value());

return {};
}


}
97 changes: 97 additions & 0 deletions src/common/io/graph/cigar.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
//***************************************************************************
//* Copyright (c) 2023-2024 SPAdes team
//* All Rights Reserved
//* See file LICENSE for details.
//***************************************************************************

#pragma once

#include <string>
#include <variant>
#include <string_view>
#include <optional>
#include <vector>
#include <algorithm>
#include <ostream>

#include <cstdint>
#include <cstdlib>
#include <cinttypes>
#include <cstdio>

namespace cigar {
struct tag {
char name[2];
char type;
std::variant<int64_t, std::string, float> val;

template<typename T>
tag(std::string_view n, std::string_view t, T v)
: name{n[0], n[1]}, type(t.front()), val(std::move(v)) {}

friend std::ostream &operator<<(std::ostream &s, const tag &t);

void print() const {
fprintf(stdout, "%c%c", name[0], name[1]);
fputs(":", stdout);
std::visit([&](const auto& value) { _print(value); }, val);
}

private:
void _print(int64_t i) const {
std::fprintf(stdout, "%c:%" PRId64, type, i);
}

void _print(const std::string &str) const {
std::fprintf(stdout, "%c:%s", type, str.c_str());
}

void _print(float f) const {
std::fprintf(stdout, "%c:%g", type, f);
}
};

struct cigarop {
uint32_t count : 24;
char op : 8;

void print() const {
std::fprintf(stdout, "%u%c", count, op);
}
};

using cigar_string = std::vector<cigarop>;

static inline std::optional<tag>
getTag(const char *name,
const std::vector<tag> &tags) {
auto res = std::find_if(tags.begin(), tags.end(),
[=](const tag &tag) {
return (tag.name[0] == name[0] &&
tag.name[1] == name[1]);
});
if (res == tags.end())
return {};

return *res;
}

template<class T>
std::optional<T> getTag(const char *name,
const std::vector<tag> &tags) {
auto res = std::find_if(tags.begin(), tags.end(),
[=](const tag &tag) {
return (tag.name[0] == name[0] &&
tag.name[1] == name[1]);
});
if (res == tags.end())
return {};

if (!std::holds_alternative<T>(res->val))
return {};

return std::get<T>(res->val);
}

std::optional<tag> parseTag(const char* line, size_t len);
}
123 changes: 123 additions & 0 deletions src/common/io/graph/cigar.inl
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
//***************************************************************************
//* Copyright (c) 2023-2024 SPAdes team
//* All Rights Reserved
//* See file LICENSE for details.
//***************************************************************************

#include <lexy/dsl.hpp> // lexy::dsl::*
#include <lexy/callback.hpp> // lexy callbacks
#include <lexy/grammar.hpp>

#include <string>

namespace cigar::grammar {
namespace dsl = lexy::dsl;

struct tag {
struct tag_character : lexy::token_production {
static constexpr auto rule = dsl::capture(dsl::ascii::alpha_digit);
static constexpr auto value = lexy::as_string<std::string>;
};

struct tag_integer : lexy::token_production {
static constexpr auto rule =
dsl::minus_sign + dsl::integer<std::int64_t>(dsl::digits<>.no_leading_zero());
static constexpr auto value = lexy::as_integer<std::int64_t>;
};

struct tag_string : lexy::token_production {
static constexpr auto rule = dsl::identifier(dsl::ascii::print);
static constexpr auto value = lexy::as_string<std::string>;
};

struct tag_float : lexy::token_production {
static constexpr auto rule = [] {
auto integer = dsl::if_(dsl::lit_c < '-' > ) + dsl::digits<>.no_leading_zero();
auto fraction = dsl::lit_c < '.' > >> dsl::digits<>;
auto exp_char = dsl::lit_c < 'e' > | dsl::lit_c<'E'>;
auto exponent = exp_char >> (dsl::lit_c < '+' > | dsl::lit_c < '-' > ) + dsl::digits<>;
return dsl::peek(dsl::lit_c < '-' > / dsl::digit<>) >>
dsl::position +
integer +
dsl::if_(fraction) +
dsl::if_(exponent) +
dsl::position;
}();

static constexpr auto value = lexy::callback<float>(
// std::from_chars(const char*, const char*, float) is only
// available starting from libc++ from LLVM 14 :(
[](const char *first, const char *) { return ::atof(first); }
);
};

struct tag_name : lexy::token_production {
static constexpr auto name = "tag name";

static constexpr auto rule = dsl::capture(dsl::token(dsl::ascii::alpha + dsl::ascii::alpha_digit));
static constexpr auto value = lexy::as_string<std::string_view>;
};

struct invalid_tag_type {
static constexpr auto name = "invalid tag type";
};

static constexpr auto rule = [] {
auto colon = dsl::lit_c<':'>;
return dsl::p<tag_name> >> colon +
(
dsl::capture(LEXY_LIT("A")) >> colon + dsl::p < tag_character > |
dsl::capture(LEXY_LIT("i")) >> colon + dsl::p < tag_integer > |
dsl::capture(LEXY_LIT("f")) >> colon + dsl::p < tag_float > |
dsl::capture(LEXY_LIT("Z")) >> colon + dsl::p < tag_string > |
dsl::capture(LEXY_LIT("J")) >> colon + dsl::p < tag_string > |
dsl::capture(LEXY_LIT("H")) >> colon + dsl::p < tag_string > |
dsl::capture(LEXY_LIT("B")) >> colon + dsl::p < tag_string > |
dsl::error<invalid_tag_type>
);
}();

static constexpr auto value = lexy::callback<cigar::tag>(
[](std::string_view name, auto type, auto val) {
return cigar::tag{name, std::string_view{type.data(), type.size()}, val};
});
};

struct cigar_string {
static constexpr auto name = "CIGAR string";

static constexpr auto cigaropcode =
LEXY_CHAR_CLASS("CIGAR opcode",
LEXY_LIT("M") / LEXY_LIT("I") / LEXY_LIT("D") /
LEXY_LIT("N") / LEXY_LIT("S") / LEXY_LIT("H") /
LEXY_LIT("P") / LEXY_LIT("X") / LEXY_LIT("=")) / LEXY_LIT("J");

struct cigarop : lexy::transparent_production {
static constexpr auto name = "CIGAR operation";

static constexpr auto rule =
dsl::period |
dsl::integer<std::uint32_t> >> dsl::capture(cigaropcode);
static constexpr auto value = lexy::callback<cigar::cigarop>(
[]() { return cigar::cigarop{0, 0}; },
[](std::uint32_t cnt, auto lexeme) {
return cigar::cigarop{cnt, lexeme[0]};
});
};

static constexpr auto rule = dsl::list(dsl::p<cigarop>);
static constexpr auto value = lexy::as_list<std::vector<cigar::cigarop>>;
};

static constexpr auto tab = dsl::lit_c<'\t'>;

struct opt_tags {
static constexpr auto name = "tags";

static constexpr auto rule = [] {
auto tags = dsl::list(dsl::p<tag>, dsl::trailing_sep(tab));
return dsl::eof | (tab >> tags + dsl::eof);
}();
static constexpr auto value = lexy::as_list<std::vector<cigar::tag>>;
};
}
Loading