Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix some encoding issues within Diff::LCS::Hunk #15

Merged
merged 5 commits into from
Mar 30, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 52 additions & 20 deletions lib/diff/lcs/hunk.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ class Diff::LCS::Hunk
def initialize(data_old, data_new, piece, flag_context, file_length_difference)
# At first, a hunk will have just one Block in it
@blocks = [ Diff::LCS::Block.new(piece) ]
@preferred_data_encoding = data_old[0].encoding if String.method_defined?(:encoding)
@data_old = data_old
@data_new = data_new

Expand Down Expand Up @@ -120,19 +121,19 @@ def old_diff
# Calculate item number range. Old diff range is just like a context
# diff range, except the ranges are on one line with the action between
# them.
s = "#{context_range(:old)}#{op_act[block.op]}#{context_range(:new)}\n"
s = encode("#{context_range(:old)}#{op_act[block.op]}#{context_range(:new)}\n")
# If removing anything, just print out all the remove lines in the hunk
# which is just all the remove lines in the block.
@data_old[@start_old .. @end_old].each { |e| s << "< #{e}\n" } unless block.remove.empty?
s << "---\n" if block.op == "!"
@data_new[@start_new .. @end_new].each { |e| s << "> #{e}\n" } unless block.insert.empty?
@data_old[@start_old .. @end_old].each { |e| s << encode("< ") + e + encode("\n") } unless block.remove.empty?
s << encode("---\n") if block.op == "!"
@data_new[@start_new .. @end_new].each { |e| s << encode("> ") + e + encode("\n") } unless block.insert.empty?
s
end
private :old_diff

def unified_diff
# Calculate item number range.
s = "@@ -#{unified_range(:old)} +#{unified_range(:new)} @@\n"
s = encode("@@ -#{unified_range(:old)} +#{unified_range(:new)} @@\n")

# Outlist starts containing the hunk of the old file. Removing an item
# just means putting a '-' in front of it. Inserting an item requires
Expand All @@ -145,54 +146,54 @@ def unified_diff
# file -- don't take removed items into account.
lo, hi, num_added, num_removed = @start_old, @end_old, 0, 0

outlist = @data_old[lo .. hi].collect { |e| e.gsub(/^/, ' ') }
outlist = @data_old[lo .. hi].collect { |e| match_encoding_gsub(e,'^', ' ') }

@blocks.each do |block|
block.remove.each do |item|
op = item.action.to_s # -
offset = item.position - lo + num_added
outlist[offset].gsub!(/^ /, op.to_s)
match_encoding_gsub!(outlist[offset],'^ ', op.to_s)
num_removed += 1
end
block.insert.each do |item|
op = item.action.to_s # +
offset = item.position - @start_new + num_removed
outlist[offset, 0] = "#{op}#{@data_new[item.position]}"
outlist[offset, 0] = encode(op) + @data_new[item.position]
num_added += 1
end
end

s << outlist.join("\n")
s << outlist.join(encode("\n"))
end
private :unified_diff

def context_diff
s = "***************\n"
s << "*** #{context_range(:old)} ****\n"
s = encode("***************\n")
s << encode("*** #{context_range(:old)} ****\n")
r = context_range(:new)

# Print out file 1 part for each block in context diff format if there
# are any blocks that remove items
lo, hi = @start_old, @end_old
removes = @blocks.select { |e| not e.remove.empty? }
if removes
outlist = @data_old[lo .. hi].collect { |e| e.gsub(/^/, ' ') }
outlist = @data_old[lo .. hi].collect { |e| match_encoding_gsub(e,'^', ' ') }
removes.each do |block|
block.remove.each do |item|
outlist[item.position - lo].gsub!(/^ /) { block.op } # - or !
match_encoding_gsub!( outlist[item.position - lo], '^ ') { block.op } # - or !
end
end
s << outlist.join("\n")
end

s << "\n--- #{r} ----\n"
s << encode("\n--- #{r} ----\n")
lo, hi = @start_new, @end_new
inserts = @blocks.select { |e| not e.insert.empty? }
if inserts
outlist = @data_new[lo .. hi].collect { |e| e.gsub(/^/, ' ') }
outlist = @data_new[lo .. hi].collect { |e| match_encoding_gsub(e,'^', ' ') }
inserts.each do |block|
block.insert.each do |item|
outlist[item.position - lo].gsub!(/^ /) { block.op } # + or !
match_encoding_gsub!( outlist[item.position - lo], '^ ') { block.op } # + or !
end
end
s << outlist.join("\n")
Expand All @@ -206,14 +207,14 @@ def ed_diff(format)
warn "Expecting only one block in an old diff hunk!" if @blocks.size > 1

if format == :reverse_ed
s = "#{op_act[@blocks[0].op]}#{context_range(:old)}\n"
s = encode("#{op_act[@blocks[0].op]}#{context_range(:old)}\n")
else
s = "#{context_range(:old).gsub(/,/, ' ')}#{op_act[@blocks[0].op]}\n"
s = encode("#{match_encoding_gsub(context_range(:old), ',', ' ')}#{op_act[@blocks[0].op]}\n")
end

unless @blocks[0].insert.empty?
@data_new[@start_new .. @end_new].each { |e| s << "#{e}\n" }
s << ".\n"
@data_new[@start_new .. @end_new].each { |e| s << e + encode("\n") }
s << encode(".\n")
end
s
end
Expand Down Expand Up @@ -249,4 +250,35 @@ def unified_range(mode)
(length == 1) ? "#{first}" : "#{first},#{length}"
end
private :unified_range

if String.method_defined?(:encoding)
def encode(literal)
literal.encode @preferred_data_encoding
end

def encode_to(string, args)
args.map { |arg| arg.encode(string.encoding) }
end
else
def encode(literal)
literal
end
def encode_to(string, args)
args
end
end

private :encode
private :encode_to

def match_encoding_gsub(string, *args, &block)
string.gsub(*encode_to(string,args), &block)
end
private :match_encoding_gsub

def match_encoding_gsub!(string, *args, &block)
string.gsub!(*encode_to(string,args), &block)
end
private :match_encoding_gsub!

end
59 changes: 59 additions & 0 deletions spec/hunk_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# -*- ruby encoding: utf-8 -*-

require 'spec_helper'

describe "Diff::LCS::Hunk" do

if String.method_defined?(:encoding)

let(:old_data) { ["Tu avec carté {count} itém has".encode('UTF-16LE')] }
let(:new_data) { ["Tu avec carte {count} item has".encode('UTF-16LE')] }
let(:peices) { Diff::LCS.diff old_data, new_data }
let(:hunk) { Diff::LCS::Hunk.new(old_data, new_data, peices[0], 3, 0) }

it 'should be able to produce a unified diff from the two peices' do
expected =
(<<-EOD.encode('UTF-16LE').chomp)
@@ -1,2 +1,2 @@
Tu avec carté {count} itém has
+Tu avec carte {count} item has
EOD
expect(hunk.diff(:unified).to_s == expected).to eql true
end

it 'should be able to produce a context diff from the two peices' do
expected =
(<<-EOD.encode('UTF-16LE').chomp)
***************
*** 1,2 ****
Tu avec carté {count} itém has
--- 1,2 ----
Tu avec carte {count} item has
EOD
expect(hunk.diff(:context).to_s == expected).to eql true
end

it 'should be able to produce an old diff from the two peices' do
expected =
(<<-EOD.encode('UTF-16LE').chomp)
1,2c1,2
< Tu avec carté {count} itém has
---
> Tu avec carte {count} item has

EOD
expect(hunk.diff(:old).to_s == expected).to eql true
end

it 'should be able to produce a reverse ed diff from the two peices' do
expected =
(<<-EOD.encode('UTF-16LE').chomp)
c1,2
Tu avec carte {count} item has
.

EOD
expect(hunk.diff(:reverse_ed).to_s == expected).to eql true
end
end
end