diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8855b5e --- /dev/null +++ b/.gitignore @@ -0,0 +1,29 @@ +*.gem +*.rbc +.bundle +.config +.yardoc +Gemfile.lock +InstalledFiles +_yardoc +vendor +coverage +doc/ +lib/bundler/man +pkg +rdoc +spec/reports +test/tmp +test/version_tmp +tmp + +# JetBrains +.idea + +# mac +.DS_Store +__MACOSX + +# emacs turds +(.*/)?\#[^/]*\#$ + diff --git a/Gemfile.lock b/Gemfile.lock deleted file mode 100644 index 5fcc440..0000000 --- a/Gemfile.lock +++ /dev/null @@ -1,34 +0,0 @@ -PATH - remote: . - specs: - apfel (0.0.4) - -GEM - remote: https://rubygems.org/ - specs: - coderay (1.0.8) - diff-lcs (1.1.3) - method_source (0.8.1) - pry (0.9.10) - coderay (~> 1.0.5) - method_source (~> 0.8) - slop (~> 3.3.1) - rake (0.9.2.2) - rspec (2.12.0) - rspec-core (~> 2.12.0) - rspec-expectations (~> 2.12.0) - rspec-mocks (~> 2.12.0) - rspec-core (2.12.0) - rspec-expectations (2.12.0) - diff-lcs (~> 1.1.3) - rspec-mocks (2.12.0) - slop (3.3.3) - -PLATFORMS - ruby - -DEPENDENCIES - apfel! - pry - rake - rspec diff --git a/lib/apfel.rb b/lib/apfel.rb index 46c2330..55295d1 100644 --- a/lib/apfel.rb +++ b/lib/apfel.rb @@ -7,10 +7,12 @@ module Apfel # strings object def self.parse(file) file = read(file) + # confirmed that read does remove the first comment in utf DotStringsParser.new(file).parse_file end def self.read(file) - Reader.read(file) + # confirmed that read does remove the first comment in utf + Reader.read(file) end end diff --git a/lib/apfel/line.rb b/lib/apfel/line.rb index 500cac5..f2537a6 100644 --- a/lib/apfel/line.rb +++ b/lib/apfel/line.rb @@ -61,12 +61,47 @@ def key def value if key_value_pair? - cleaned_content.partition(/"\s*=\s*"/)[2].gsub!(/(^"|"$)/, "") + unescape_value cleaned_content.partition(/"\s*=\s*"/)[2].gsub!(/(^"|"$)/, "") end end def is_comment? whole_comment? || open_comment? || close_comment? || in_comment end + + private + + # http://developer.apple.com/library/mac/#documentation/cocoa/conceptual/LoadingResources/Strings/Strings.html + def unescape_value(string) + state = :normal + out = '' + string.each_char do |c| + case state + when :normal + if c == '\\' + state = :escape + else + out += c + end + when :escape + state = :normal + case c + when '\\' + out += '\\' + when '"' + out += '"' + when 'r' + out += "\r" + when 'n' + out += "\n" + when 't' + out += "\t" + else + out += '\\' + c # Do nothing, however in the future handling unicode escapes could be good + end + end + end + out + end end end diff --git a/lib/apfel/reader.rb b/lib/apfel/reader.rb index 0e26838..bc792eb 100644 --- a/lib/apfel/reader.rb +++ b/lib/apfel/reader.rb @@ -4,9 +4,12 @@ class Reader # Reads in a file and returns an array consisting of each line of input # cleaned of new line characters def self.read(file) - File.open(file, "r") do |f| - content_array=[] - content = f.read + File.open(file, 'r') do |f| + content_array=[] + # http://stackoverflow.com/questions/5011504/is-there-a-way-to-remove-the-bom-from-a-utf-8-encoded-file + # problem is the BOM that can be found at char 0 in strings files + content = f.read.force_encoding('UTF-8') + content.sub!("\xEF\xBB\xBF".force_encoding("UTF-8"), '') content.each_line do |line| line.gsub!("\n","") content_array.push(line) diff --git a/spec/apfel_escapes_spec.rb b/spec/apfel_escapes_spec.rb new file mode 100644 index 0000000..d1ac7cf --- /dev/null +++ b/spec/apfel_escapes_spec.rb @@ -0,0 +1,32 @@ +require 'spec_helper' +require 'apfel' + +describe Apfel do + describe '::parse_file' do + context 'when given DotStrings file with escapes'do + let(:parsed_file_hash) do + Apfel.parse('./spec/files/escapes.strings').to_hash(:with_comments => false) + end + + it 'should parse nl' do + parsed_file_hash['multiline'].should eq "line 1\nline 2" + end + + it 'should parse cr' do + parsed_file_hash['mac'].should eq "before cr\rafter cr" + end + + it 'should parse tabs' do + parsed_file_hash['tabs'].should eq "two spaces \t equals tab" + end + + it 'should parse double quotes' do + parsed_file_hash['dq'].should eq "\"someone said this\"" + end + + it 'should parse backslashes' do + parsed_file_hash['backslash'].should eq "\\not a forward slash" + end + end + end +end diff --git a/spec/apfel_parse_ascii_file_spec.rb b/spec/apfel_parse_ascii_file_spec.rb new file mode 100644 index 0000000..c0f4411 --- /dev/null +++ b/spec/apfel_parse_ascii_file_spec.rb @@ -0,0 +1,50 @@ +require 'spec_helper' +require 'apfel' +require 'apfel/parsed_dot_strings' + +describe Apfel do + describe '::parse_file' do + context 'when given a ASCII DotStrings file'do + + it 'the file should be ascii' do + res = `file -I ./spec/files/ascii.strings` + encoding = res.split(/=/).last.gsub!("\n",'') + encoding.should == 'us-ascii' + end + + let(:parsed_file) do + Apfel.parse('./spec/files/ascii.strings') + end + + it 'returns a ParsedDotStrings object' do + parsed_file.should be_a(Apfel::ParsedDotStrings) + end + + #it 'should have the correct keys' do + # parsed_file.keys.should include 'key_number_one' + # parsed_file.keys.should include 'key_number_two' + # parsed_file.keys.should include 'key_number_three' + #end + # + #it 'should have the correct values' do + # parsed_file.values.should include 'value number one' + # parsed_file.values.should include 'value number two' + # parsed_file.values.should include 'value number three' + #end + # + describe 'should have the correct comments' do + it 'should have the correct comment for avoided social event' do + parsed_file.comments['avoided social event'].should == 'No comment provided by engineer.' + end + + it 'should have the correct comment for binged' do + parsed_file.comments['binged'].should == 'No comment provided by engineer.' + end + + it 'should have the correct comment for called a friend' do + parsed_file.comments['called a friend'].should == 'No comment provided by engineer.' + end + end + end + end +end diff --git a/spec/apfel_parse_utf8_file_spec.rb b/spec/apfel_parse_utf8_file_spec.rb new file mode 100644 index 0000000..064476b --- /dev/null +++ b/spec/apfel_parse_utf8_file_spec.rb @@ -0,0 +1,53 @@ +require 'spec_helper' +require 'apfel' +require 'apfel/parsed_dot_strings' + +describe Apfel do + describe '::parse_file' do + context 'when given a UTF8 DotStrings file'do + + it 'the file should be utf-8' do + File.open('./spec/files/utf8.strings', 'r') do |f| + f.external_encoding.name.should == 'UTF-8' + content = f.read + content.encoding.name.should == 'UTF-8' + end + end + + let(:parsed_file) do + Apfel.parse('./spec/files/utf8.strings') + end + + it 'returns a ParsedDotStrings object' do + parsed_file.should be_a(Apfel::ParsedDotStrings) + end + + #it 'should have the correct keys' do + # parsed_file.keys.should include 'key_number_one' + # parsed_file.keys.should include 'key_number_two' + # parsed_file.keys.should include 'key_number_three' + #end + # + #it 'should have the correct values' do + # parsed_file.values.should include 'value number one' + # parsed_file.values.should include 'value number two' + # parsed_file.values.should include 'value number three' + #end + # + describe 'should have the correct comments' do + it 'should have the correct comment for anger' do + parsed_file.comments['anger'].should == 'No comment provided by engineer.' + end + + it 'should have the correct comment for anxiety' do + parsed_file.comments['anxiety'].should == 'No comment provided by engineer.' + end + + it 'should have the correct comment for boredom' do + parsed_file.comments['boredom'].should == 'No comment provided by engineer.' + end + end + + end + end +end diff --git a/spec/apfel_spec.rb b/spec/apfel_spec.rb index 77a753d..a96203b 100644 --- a/spec/apfel_spec.rb +++ b/spec/apfel_spec.rb @@ -4,7 +4,7 @@ describe Apfel do describe '::parse_file' do - context 'when given a valid DotStrings file'do + context 'when given an ASCII DotStrings file'do let(:parsed_file) do Apfel.parse(valid_file) @@ -13,16 +13,43 @@ it 'returns a ParsedDotStrings object' do parsed_file.should be_a(Apfel::ParsedDotStrings) end + + it 'should have the correct keys' do + parsed_file.keys.should include 'key_number_one' + parsed_file.keys.should include 'key_number_two' + parsed_file.keys.should include 'key_number_three' + end + + it 'should have the correct values' do + parsed_file.values.should include 'value number one' + parsed_file.values.should include 'value number two' + parsed_file.values.should include 'value number three' + end + + describe 'should have the correct comments' do + it 'should have the correct comment for first' do + parsed_file.comments(with_keys: false).should include 'This is the first comment' + end + + it 'should have the correct comment for second' do + parsed_file.comments['key_number_two'].should == 'This is a multiline comment' + end + + + it 'should have the correct comment for third' do + parsed_file.comments(with_keys: false).should include 'This is comment number 3' + end + end end context 'when given an invalid strings file' do context 'missing a semicolon' do let(:invalid_file_semicolon) do - create_temp_file( <<-EOS + create_temp_file('ascii', <<-EOS /* This is the first comment */ "key_number_one" = "value number one" - EOS + EOS ) end @@ -35,7 +62,7 @@ context 'not closed comment' do let(:invalid_file_comment) do - create_temp_file(<<-EOS + create_temp_file('ascii', <<-EOS /* This is the first comment "key_number_one" = "value number one"; diff --git a/spec/files/ascii.strings b/spec/files/ascii.strings new file mode 100644 index 0000000..4508787 --- /dev/null +++ b/spec/files/ascii.strings @@ -0,0 +1,9 @@ +/* No comment provided by engineer. */ +"avoided social event" = "avoided social event"; + +/* No comment provided by engineer. */ +"binged" = "binged"; + +/* No comment provided by engineer. */ +"called a friend" = "called a friend"; + diff --git a/spec/files/escapes.strings b/spec/files/escapes.strings new file mode 100644 index 0000000..f6c7d71 --- /dev/null +++ b/spec/files/escapes.strings @@ -0,0 +1,5 @@ +"multiline" = "line 1\nline 2"; +"mac" = "before cr\rafter cr"; +"tabs" = "two spaces \t equals tab"; +"dq" = "\"someone said this\""; +"backslash" = "\\not a forward slash"; diff --git a/spec/files/utf8.strings b/spec/files/utf8.strings new file mode 100644 index 0000000..40632e1 --- /dev/null +++ b/spec/files/utf8.strings @@ -0,0 +1,8 @@ +/* No comment provided by engineer. */ +"anger" = "anger"; + +/* No comment provided by engineer. */ +"anxiety" = "anxiety"; + +/* No comment provided by engineer. */ +"boredom" = "boredom"; diff --git a/spec/reader_spec.rb b/spec/reader_spec.rb index 123bff8..0d02c24 100644 --- a/spec/reader_spec.rb +++ b/spec/reader_spec.rb @@ -5,7 +5,7 @@ module Apfel describe Reader do describe '#read' do let(:temp_file) do - create_temp_file(<<-EOS + create_temp_file('ascii', <<-EOS This is a file with some lines. Roses are red, violets are blue. This text is really boring, diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index ed46613..5aee9b1 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -1,13 +1,17 @@ require 'tempfile' require 'json' -def create_temp_file(string) - temp_file = Tempfile.new('temp') + +# added encoding to see if could could reproduce the missing first comment in +# utf8 strings files - did not reproduce the problem +# SOLVED: it was the BOM at char 0 in the strings file +def create_temp_file(encoding, string) + temp_file = Tempfile.new([encoding, 'temp']) temp_file << string temp_file.flush end -def valid_file - create_temp_file(<<-EOS +def valid_file(encoding='ascii') + create_temp_file(encoding, <<-EOS /* This is the first comment */ "key_number_one" = "value number one"; @@ -18,6 +22,6 @@ def valid_file "key_number_two" = "value number two"; /* This is comment number 3 */ "key_number_three" = " value number three "; - EOS + EOS ) end