-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathutils.c
More file actions
270 lines (236 loc) · 8.54 KB
/
utils.c
File metadata and controls
270 lines (236 loc) · 8.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
#include "utils.h"
int binary_search(uint64_t *arr, uint64_t size, uint64_t key) {
int low = 0, high = size - 1;
while (low <= high) {
int mid = low + (high - low) / 2;
if (arr[mid] == key) {
return mid;
}
if (arr[mid] < key) {
low = mid + 1;
} else {
high = mid - 1;
}
}
return -1;
}
void quicksort(uint64_t *array, int low, int high) {
if (low < high) {
uint64_t pivot = array[high];
int i = low - 1;
for (int j = low; j < high; j++) {
if (array[j] < pivot) {
i++;
uint64_t temp = array[i];
array[i] = array[j];
array[j] = temp;
}
}
uint64_t temp = array[i+1];
array[i+1] = array[high];
array[high] = temp;
quicksort(array, low, i);
quicksort(array, i + 2, high);
}
}
void open_file_r(FILE **file, const char *filename) {
*file = fopen(filename, "r");
if (*file == NULL) {
fprintf(stderr, "[ERROR] Couldn't open file %s\n", filename);
exit(EXIT_FAILURE);
}
}
void open_file_w(FILE **file, const char *filename) {
*file = fopen(filename, "w");
if (*file == NULL) {
fprintf(stderr, "[ERROR] Couldn't open file %s\n", filename);
exit(EXIT_FAILURE);
}
}
void open_files(struct opt_arg *args, FILE **out_segment, FILE **out_link) {
*out_segment = NULL;
*out_link = NULL;
// create segment file name and open it
char segment_filename[strlen(args->gfa_path)+5];
snprintf(segment_filename, sizeof(segment_filename), "%s.s.0", args->gfa_path);
open_file_w(out_segment, segment_filename);
// create link file name and open it
char link_filename[strlen(args->gfa_path)+5];
snprintf(link_filename, sizeof(link_filename), "%s.l.0", args->gfa_path);
open_file_w(out_link, link_filename);
// print header
fprintf(*out_segment, "H\tVN:Z:1.1\n");
FILE *out_log;
if (args->prefix == NULL) {
char out_err_filename[10];
snprintf(out_err_filename, sizeof(out_err_filename), "lcpan.log");
open_file_w(&out_log, out_err_filename);
} else {
char out_err_filename[strlen(args->prefix)+5];
snprintf(out_err_filename, sizeof(out_err_filename), "%s.log", args->prefix);
open_file_w(&out_log, out_err_filename);
}
fprintf(out_log, "vg\n");
fprintf(out_log, "%s\n", args->gfa_path);
fprintf(out_log, "%d\n", args->thread_number);
fclose(out_log);
}
void print_seq3(uint64_t id, const char *seq1, int seq1_len,
const char *seq2, int seq2_len,
const char *seq3, int seq3_len,
const char *seq_name, int start, int rank, int is_rgfa, FILE *out) {
fprintf(out, "S\t%lu\t", id);
fwrite(seq1, 1, seq1_len, out);
fwrite(seq2, 1, seq2_len, out);
fwrite(seq3, 1, seq3_len, out);
if (is_rgfa) {
fprintf(out, "\tSN:Z:%s\tSO:i:%d\tSR:i:%d\n", seq_name, start, rank);
} else {
fprintf(out, "\n");
}
}
void print_seq2(uint64_t id, const char *seq1, int seq1_len,
const char *seq2, int seq2_len,
const char *seq_name, int start, int rank, int is_rgfa, FILE *out) {
fprintf(out, "S\t%lu\t", id);
fwrite(seq1, 1, seq1_len, out);
fwrite(seq2, 1, seq2_len, out);
if (is_rgfa) {
fprintf(out, "\tSN:Z:%s\tSO:i:%d\tSR:i:%d\n", seq_name, start, rank);
} else {
fprintf(out, "\n");
}
}
void print_seq(uint64_t id, const char *seq, int seq_len, const char *seq_name, int start, int rank, int is_rgfa, FILE *out) {
fprintf(out, "S\t%lu\t", id);
fwrite(seq, 1, seq_len, out);
if (is_rgfa) {
fprintf(out, "\tSN:Z:%s\tSO:i:%d\tSR:i:%d\n", seq_name, start, rank);
} else {
fprintf(out, "\n");
}
}
void print_seq3_vg(uint64_t id, const char *seq1, int seq1_len,
const char *seq2, int seq2_len,
const char *seq3, int seq3_len,
const char *seq_name, int order, int start, int rank, int is_rgfa, FILE *out) {
fprintf(out, "S\t%lu\t", id);
fwrite(seq1, 1, seq1_len, out);
fwrite(seq2, 1, seq2_len, out);
fwrite(seq3, 1, seq3_len, out);
if (is_rgfa) {
fprintf(out, "\tSN:Z:%s.%d\tSO:i:%d\tSR:i:%d\n", seq_name, order, start, rank);
} else {
fprintf(out, "\n");
}
}
void print_seq2_vg(uint64_t id, const char *seq1, int seq1_len,
const char *seq2, int seq2_len,
const char *seq_name, int order, int start, int rank, int is_rgfa, FILE *out) {
fprintf(out, "S\t%lu\t", id);
fwrite(seq1, 1, seq1_len, out);
fwrite(seq2, 1, seq2_len, out);
if (is_rgfa) {
fprintf(out, "\tSN:Z:%s.%d\tSO:i:%d\tSR:i:%d\n", seq_name, order, start, rank);
} else {
fprintf(out, "\n");
}
}
void print_seq_vg(uint64_t id, const char *seq, int seq_len, const char *seq_name, int order, int start, int rank, int is_rgfa, FILE *out) {
fprintf(out, "S\t%lu\t", id);
fwrite(seq, 1, seq_len, out);
if (is_rgfa) {
fprintf(out, "\tSN:Z:%s.%d\tSO:i:%d\tSR:i:%d\n", seq_name, order, start, rank);
} else {
fprintf(out, "\n");
}
}
void print_link(uint64_t id1, char sign1, uint64_t id2, char sign2, uint64_t overlap, FILE *out) {
fprintf(out, "L\t%lu\t%c\t%lu\t%c\t%ldM\n", id1, sign1, id2, sign2, overlap);
}
void find_boundaries(uint64_t start_loc, uint64_t end_loc, const struct chr *chrom, uint64_t start_index, uint64_t *latest_core_index, uint64_t *first_core_after) {
const struct simple_core *cores = chrom->cores;
uint64_t cores_size = (uint64_t)chrom->cores_size;
uint64_t core_idx_1 = cores[start_index].end <= start_loc ? start_index : 0;
int left, mid, right;
left = core_idx_1;
right = core_idx_1+5 < cores_size && end_loc < cores[core_idx_1+5].start ? core_idx_1+5 : cores_size;
while (left < right) {
mid = left + (right - left) / 2;
if (cores[mid].end <= start_loc) {
core_idx_1 = mid;
left = mid + 1;
} else {
right = mid;
}
}
while (core_idx_1 && start_loc < cores[core_idx_1].end ) {
core_idx_1--;
}
*latest_core_index = core_idx_1;
*first_core_after = cores_size;
for (uint64_t j=core_idx_1; j<cores_size; j++) {
if (end_loc <= cores[j].start) {
*first_core_after = j;
return;
}
}
}
void refine_seqs(struct ref_seq *seqs, int no_overlap) {
if (no_overlap) {
for (int i=0; i<seqs->size; i++) {
for (int j=1; j<seqs->chrs[i].cores_size; j++) {
if (seqs->chrs[i].cores[j].start < seqs->chrs[i].cores[j-1].end) {
seqs->chrs[i].cores[j].start = seqs->chrs[i].cores[j-1].end;
}
}
}
}
}
void refine_seq(struct lps *str, int no_overlap) {
if (no_overlap) {
for (int j=1; j<str->size; j++) {
if (str->cores[j].start < str->cores[j-1].end) {
str->cores[j].start = str->cores[j-1].end;
}
}
}
}
void print_path(const struct ref_seq *seqs, FILE *out) {
for (int i=0; i<seqs->size; i++) {
if (seqs->chrs[i].cores_size) {
const struct chr *chrom = seqs->chrs + i;
// print Path (P)
fprintf(out, "P\t%s\t", chrom->seq_name);
// print first core
if (chrom->ids != NULL && chrom->ids[0] != NULL) { // if first one is not NULL
fprintf(out, "%lu+", chrom->ids[0][0]);
int index = 1;
while (chrom->ids[0][index]) {
fprintf(out, ",%lu+", chrom->ids[0][index++]);
}
fprintf(out, ",%lu+", chrom->cores[0].id);
} else {
fprintf(out, "%lu+", chrom->cores[0].id);
}
// print rest
if (chrom->ids != NULL) {
for (int j=1; j<chrom->cores_size; j++) {
if (chrom->ids[j] != NULL) {
int index = 0;
while (chrom->ids[j][index]) {
fprintf(out, ",%lu+", chrom->ids[j][index++]);
}
}
fprintf(out, ",%lu+", chrom->cores[j].id);
}
} else {
for (int j=1; j<chrom->cores_size; j++) {
fprintf(out, ",%lu+", chrom->cores[j].id);
}
}
// print cigar
fprintf(out, "\t*\n");
}
}
}