-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathntriples2turtle.rb
More file actions
83 lines (73 loc) · 2.19 KB
/
ntriples2turtle.rb
File metadata and controls
83 lines (73 loc) · 2.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
prefix = "@prefix pride: <http://www.ebi.ac.uk/pride/rdf/> ."
out_folder = "/Volumes/orenostorage/PRIDE/turtle/"
files = Dir.glob("/Volumes/orenostorage/PRIDE/RDF/*")
#########################################################
# resume
#########################################################
=begin
skip = "PRD000659"
skip = files.index("/Volumes/orenostorage/PRIDE/RDF/#{skip}.nt")
files.shift(skip)
=end
# test data
#files = ["/Volumes/orenostorage/PRIDE/RDF/PRD000594.nt"]
files.each do |file|
accession = file.split("/")[-1].sub(/nt/, "ttl")
puts accession
#=begin
# delete illegal characters in URI
out = File.open("#{file}2", "w")
File.open(file).each do |line|
line = line.chomp.delete("|").gsub(/\\/, "/")
if line =~ /<.+?,\s.+?>/
line = line.gsub(/,\s/, "_and_")
end
if line =~ /<.+?\s.+?>/
line = line.gsub(/\s/, "_")
line = line.gsub(/_</, " <").sub(/_\"/, " \"").sub(/__:/, " _:").sub(/_\.$/, " .")
end
out.puts line
end
out.close
File.rename("#{file}2", file)
#=end
=begin
# delete illegal character in URI (cause error for large data >2G?)
open(file, "r+") do |f|
f.flock(File::LOCK_EX)
out = Array.new
body = f.read
body.split("\n").each do |line|
line = line.delete("|").gsub(/\\/, "/")
if line =~ /<.+?,\s.+?>/
line = line.gsub(/,\s/, "_and_")
end
if line =~ /<.+?\s.+?>/
line = line.gsub(/\s/, "_")
line = line.gsub(/_</, " <").sub(/_\"/, " \"").sub(/__:/, " _:").sub(/_\.$/, " .")
end
out << line
end
f.rewind
f.puts out.join("\n")
f.truncate(f.tell)
end
=end
#=begin
# convert ntriples format to turtle format in order to reduce a file size and check a grammar
ttl = `/usr/local/bin/rapper -i ntriples -o turtle #{file}`
# replace URL into prefix:value in order to reduce file size
out = File.open("#{out_folder}/#{accession}", "w")
out.puts prefix
ttl.split("\n").each do |line|
if line =~ /http\:\/\/www.ebi.ac.uk\/pride\/rdf\//
line = line.sub(/<http\:\/\/www.ebi.ac.uk\/pride\/rdf\//, "pride:").sub(/>/, "")
out.puts line
else
out.puts line
end
end
ttl = ""
out.close
#=end
end