From 0f02de0dd32279017b5fb5ee2d201b874f64af7b Mon Sep 17 00:00:00 2001 From: Heejae Chang Date: Fri, 14 Oct 2016 11:10:43 -0700 Subject: [PATCH 1/4] review only --- .../Core/Portable/GlobalSuppressions.cs | 1 + .../InternalUtilities/EncodingExtensions.cs | 26 +++-- .../Core/Portable/PublicAPI.Unshipped.txt | 1 + src/Compilers/Core/Portable/Text/LargeText.cs | 95 ++++++++++++------- .../Core/Portable/Text/SourceText.cs | 49 +++++++++- .../TemporaryStorageServiceFactory.cs | 25 ++++- .../TextFactory/DesktopTextFactoryService.cs | 7 ++ 7 files changed, 158 insertions(+), 46 deletions(-) diff --git a/src/Compilers/Core/Portable/GlobalSuppressions.cs b/src/Compilers/Core/Portable/GlobalSuppressions.cs index 81a04ac25c91d..e05dbf22d3d2a 100644 --- a/src/Compilers/Core/Portable/GlobalSuppressions.cs +++ b/src/Compilers/Core/Portable/GlobalSuppressions.cs @@ -11,5 +11,6 @@ // required paramter type is different for all overloads and so there is no ambiguity. [assembly: System.Diagnostics.CodeAnalysis.SuppressMessage("ApiDesign", "RS0026:Do not add multiple public overloads with optional parameters", Justification = "", Scope = "member", Target = "~M:Microsoft.CodeAnalysis.Text.SourceText.From(System.IO.Stream,System.Text.Encoding,Microsoft.CodeAnalysis.Text.SourceHashAlgorithm,System.Boolean,System.Boolean)~Microsoft.CodeAnalysis.Text.SourceText")] [assembly: System.Diagnostics.CodeAnalysis.SuppressMessage("ApiDesign", "RS0026:Do not add multiple public overloads with optional parameters", Justification = "", Scope = "member", Target = "~M:Microsoft.CodeAnalysis.Text.SourceText.From(System.Byte[],System.Int32,System.Text.Encoding,Microsoft.CodeAnalysis.Text.SourceHashAlgorithm,System.Boolean,System.Boolean)~Microsoft.CodeAnalysis.Text.SourceText")] +[assembly: System.Diagnostics.CodeAnalysis.SuppressMessage("ApiDesign", "RS0026:Do not add multiple public overloads with optional parameters", Justification = "", Scope = "member", Target = "~M:Microsoft.CodeAnalysis.Text.SourceText.From(System.IO.TextReader,System.Int32,System.Text.Encoding,Microsoft.CodeAnalysis.Text.SourceHashAlgorithm)~Microsoft.CodeAnalysis.Text.SourceText")] [assembly: System.Diagnostics.CodeAnalysis.SuppressMessage("ApiDesign", "RS0027:Public API with optional parameter(s) should have the most parameters amongst its public overloads.", Justification = "", Scope = "member", Target = "~M:Microsoft.CodeAnalysis.Text.SourceText.From(System.String,System.Text.Encoding,Microsoft.CodeAnalysis.Text.SourceHashAlgorithm)~Microsoft.CodeAnalysis.Text.SourceText")] diff --git a/src/Compilers/Core/Portable/InternalUtilities/EncodingExtensions.cs b/src/Compilers/Core/Portable/InternalUtilities/EncodingExtensions.cs index b039b6806074e..9d9c7a7bb3426 100644 --- a/src/Compilers/Core/Portable/InternalUtilities/EncodingExtensions.cs +++ b/src/Compilers/Core/Portable/InternalUtilities/EncodingExtensions.cs @@ -19,11 +19,29 @@ internal static int GetMaxCharCountOrThrowIfHuge(this Encoding encoding, Stream Debug.Assert(stream.CanSeek); long length = stream.Length; + int maxCharCount; + if (encoding.TryGetMaxCharCount(length, out maxCharCount)) + { + return maxCharCount; + } + +#if WORKSPACE_DESKTOP + throw new IOException(WorkspacesResources.Stream_is_too_long); +#else + throw new IOException(CodeAnalysisResources.StreamIsTooLong); +#endif + } + + internal static bool TryGetMaxCharCount(this Encoding encoding, long length, out int maxCharCount) + { + maxCharCount = 0; + if (length <= int.MaxValue) { try { - return encoding.GetMaxCharCount((int)length); + maxCharCount = encoding.GetMaxCharCount((int)length); + return true; } catch (ArgumentOutOfRangeException) { @@ -33,11 +51,7 @@ internal static int GetMaxCharCountOrThrowIfHuge(this Encoding encoding, Stream } } -#if WORKSPACE_DESKTOP - throw new IOException(WorkspacesResources.Stream_is_too_long); -#else - throw new IOException(CodeAnalysisResources.StreamIsTooLong); -#endif + return false; } } } diff --git a/src/Compilers/Core/Portable/PublicAPI.Unshipped.txt b/src/Compilers/Core/Portable/PublicAPI.Unshipped.txt index 25056ff072876..46be8ace2abdb 100644 --- a/src/Compilers/Core/Portable/PublicAPI.Unshipped.txt +++ b/src/Compilers/Core/Portable/PublicAPI.Unshipped.txt @@ -813,6 +813,7 @@ static Microsoft.CodeAnalysis.SeparatedSyntaxList.implicit operator Micro static Microsoft.CodeAnalysis.SeparatedSyntaxList.implicit operator Microsoft.CodeAnalysis.SeparatedSyntaxList(Microsoft.CodeAnalysis.SeparatedSyntaxList nodes) -> Microsoft.CodeAnalysis.SeparatedSyntaxList static Microsoft.CodeAnalysis.Text.SourceText.From(System.IO.Stream stream, System.Text.Encoding encoding = null, Microsoft.CodeAnalysis.Text.SourceHashAlgorithm checksumAlgorithm = Microsoft.CodeAnalysis.Text.SourceHashAlgorithm.Sha1, bool throwIfBinaryDetected = false, bool canBeEmbedded = false) -> Microsoft.CodeAnalysis.Text.SourceText static Microsoft.CodeAnalysis.Text.SourceText.From(System.IO.Stream stream, System.Text.Encoding encoding, Microsoft.CodeAnalysis.Text.SourceHashAlgorithm checksumAlgorithm, bool throwIfBinaryDetected) -> Microsoft.CodeAnalysis.Text.SourceText +static Microsoft.CodeAnalysis.Text.SourceText.From(System.IO.TextReader reader, int length, System.Text.Encoding encoding = null, Microsoft.CodeAnalysis.Text.SourceHashAlgorithm checksumAlgorithm = Microsoft.CodeAnalysis.Text.SourceHashAlgorithm.Sha1) -> Microsoft.CodeAnalysis.Text.SourceText static Microsoft.CodeAnalysis.Text.SourceText.From(byte[] buffer, int length, System.Text.Encoding encoding = null, Microsoft.CodeAnalysis.Text.SourceHashAlgorithm checksumAlgorithm = Microsoft.CodeAnalysis.Text.SourceHashAlgorithm.Sha1, bool throwIfBinaryDetected = false, bool canBeEmbedded = false) -> Microsoft.CodeAnalysis.Text.SourceText static Microsoft.CodeAnalysis.Text.SourceText.From(byte[] buffer, int length, System.Text.Encoding encoding, Microsoft.CodeAnalysis.Text.SourceHashAlgorithm checksumAlgorithm, bool throwIfBinaryDetected) -> Microsoft.CodeAnalysis.Text.SourceText virtual Microsoft.CodeAnalysis.Diagnostics.AnalysisContext.RegisterOperationAction(System.Action action, System.Collections.Immutable.ImmutableArray operationKinds) -> void diff --git a/src/Compilers/Core/Portable/Text/LargeText.cs b/src/Compilers/Core/Portable/Text/LargeText.cs index 11941bc67f3fc..7d910b73eb67b 100644 --- a/src/Compilers/Core/Portable/Text/LargeText.cs +++ b/src/Compilers/Core/Portable/Text/LargeText.cs @@ -32,6 +32,7 @@ internal LargeText(ImmutableArray chunks, Encoding encoding, ImmutableAr _chunks = chunks; _encoding = encoding; _chunkStartOffsets = new int[chunks.Length]; + int offset = 0; for (int i = 0; i < chunks.Length; i++) { @@ -42,6 +43,11 @@ internal LargeText(ImmutableArray chunks, Encoding encoding, ImmutableAr _length = offset; } + internal LargeText(ImmutableArray chunks, Encoding encoding, SourceHashAlgorithm checksumAlgorithm) + : this(chunks, encoding, default(ImmutableArray), checksumAlgorithm, default(ImmutableArray)) + { + } + internal static SourceText Decode(Stream stream, Encoding encoding, SourceHashAlgorithm checksumAlgorithm, bool throwIfBinaryDetected, bool canBeEmbedded) { stream.Seek(0, SeekOrigin.Begin); @@ -58,48 +64,69 @@ internal static SourceText Decode(Stream stream, Encoding encoding, SourceHashAl using (var reader = new StreamReader(stream, encoding, detectEncodingFromByteOrderMarks: true, bufferSize: Math.Min(length, 4096), leaveOpen: true)) { - ArrayBuilder chunks = ArrayBuilder.GetInstance(1 + maxCharRemainingGuess / ChunkSize); - while (!reader.EndOfStream) - { - var nextChunkSize = ChunkSize; - if (maxCharRemainingGuess < ChunkSize) - { - // maxCharRemainingGuess typically overestimates a little - // so we will first fill a slightly smaller (maxCharRemainingGuess - 64) chunk - // and then use 64 char tail, which is likley to be resized. - nextChunkSize = Math.Max(maxCharRemainingGuess - 64, 64); - } + var chunks = ReadFromTextReader(reader, maxCharRemainingGuess, throwIfBinaryDetected); - char[] chunk = new char[nextChunkSize]; + // We must compute the checksum and embedded text blob now while we still have the original bytes in hand. + // We cannot re-encode to obtain checksum and blob as the encoding is not guaranteed to round-trip. + var checksum = CalculateChecksum(stream, checksumAlgorithm); + var embeddedTextBlob = canBeEmbedded ? EmbeddedText.CreateBlob(stream) : default(ImmutableArray); + return new LargeText(chunks, reader.CurrentEncoding, checksum, checksumAlgorithm, embeddedTextBlob); + } + } - int charsRead = reader.ReadBlock(chunk, 0, chunk.Length); - if (charsRead == 0) - { - break; - } + internal static SourceText Decode(TextReader reader, int length, Encoding encoding, SourceHashAlgorithm checksumAlgorithm) + { + if (length == 0) + { + return SourceText.From(string.Empty, encoding, checksumAlgorithm); + } - maxCharRemainingGuess -= charsRead; + // throwIfBinaryDetected == false since we are given text reader from the beginning + var chunks = ReadFromTextReader(reader, length, throwIfBinaryDetected: false); - if (charsRead < chunk.Length) - { - Array.Resize(ref chunk, charsRead); - } + return new LargeText(chunks, encoding, checksumAlgorithm); + } - // Check for binary files - if (throwIfBinaryDetected && IsBinary(chunk)) - { - throw new InvalidDataException(); - } + private static ImmutableArray ReadFromTextReader(TextReader reader, int maxCharRemainingGuess, bool throwIfBinaryDetected) + { + var chunks = ArrayBuilder.GetInstance(1 + maxCharRemainingGuess / ChunkSize); - chunks.Add(chunk); + while (reader.Peek() != -1) + { + var nextChunkSize = ChunkSize; + if (maxCharRemainingGuess < ChunkSize) + { + // maxCharRemainingGuess typically overestimates a little + // so we will first fill a slightly smaller (maxCharRemainingGuess - 64) chunk + // and then use 64 char tail, which is likley to be resized. + nextChunkSize = Math.Max(maxCharRemainingGuess - 64, 64); } - // We must compute the checksum and embedded text blob now while we still have the original bytes in hand. - // We cannot re-encode to obtain checksum and blob as the encoding is not guaranteed to round-trip. - var checksum = CalculateChecksum(stream, checksumAlgorithm); - var embeddedTextBlob = canBeEmbedded ? EmbeddedText.CreateBlob(stream) : default(ImmutableArray); - return new LargeText(chunks.ToImmutableAndFree(), reader.CurrentEncoding, checksum, checksumAlgorithm, embeddedTextBlob); + char[] chunk = new char[nextChunkSize]; + + int charsRead = reader.ReadBlock(chunk, 0, chunk.Length); + if (charsRead == 0) + { + break; + } + + maxCharRemainingGuess -= charsRead; + + if (charsRead < chunk.Length) + { + Array.Resize(ref chunk, charsRead); + } + + // Check for binary files + if (throwIfBinaryDetected && IsBinary(chunk)) + { + throw new InvalidDataException(); + } + + chunks.Add(chunk); } + + return chunks.ToImmutableAndFree(); } /// @@ -272,7 +299,7 @@ private int[] ParseLineStarts() case '\u0085': case '\u2028': case '\u2029': - line_break: + line_break: arrayBuilder.Add(position); position = index; break; diff --git a/src/Compilers/Core/Portable/Text/SourceText.cs b/src/Compilers/Core/Portable/Text/SourceText.cs index 1ddbdfe67b926..047c3a835e3e5 100644 --- a/src/Compilers/Core/Portable/Text/SourceText.cs +++ b/src/Compilers/Core/Portable/Text/SourceText.cs @@ -29,7 +29,7 @@ public abstract class SourceText private TextLineCollection _lazyLineInfo; private ImmutableArray _lazyChecksum; private ImmutableArray _precomputedEmbeddedTextBlob; - + private static readonly Encoding s_utf8EncodingWithNoBOM = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false, throwOnInvalidBytes: false); protected SourceText(ImmutableArray checksum = default(ImmutableArray), SourceHashAlgorithm checksumAlgorithm = SourceHashAlgorithm.Sha1, SourceTextContainer container = null) @@ -97,6 +97,47 @@ public static SourceText From(string text, Encoding encoding = null, SourceHashA return new StringText(text, encoding, checksumAlgorithm: checksumAlgorithm); } + /// + /// Constructs a from text in a string. + /// + /// TextReader + /// length of content from + /// + /// Encoding of the file that the was read from or is going to be saved to. + /// null if the encoding is unspecified. + /// If the encoding is not specified the resulting isn't debuggable. + /// If an encoding-less is written to a file a shall be used as a default. + /// + /// + /// Hash algorithm to use to calculate checksum of the text that's saved to PDB. + /// + /// is null. + /// is not supported. + public static SourceText From( + TextReader reader, + int length, + Encoding encoding = null, + SourceHashAlgorithm checksumAlgorithm = SourceHashAlgorithm.Sha1) + { + if (reader == null) + { + throw new ArgumentNullException(nameof(reader)); + } + + ValidateChecksumAlgorithm(checksumAlgorithm); + + encoding = encoding ?? s_utf8EncodingWithNoBOM; + + // If the resulting string would end up on the large object heap, then use LargeEncodedText. + if (length >= LargeObjectHeapLimitInChars) + { + return LargeText.Decode(reader, length, encoding, checksumAlgorithm); + } + + string text = reader.ReadToEnd(); + return From(text, encoding, checksumAlgorithm); + } + // 1.0 BACKCOMPAT OVERLOAD - DO NOT TOUCH [EditorBrowsable(EditorBrowsableState.Never)] public static SourceText From(Stream stream, Encoding encoding, SourceHashAlgorithm checksumAlgorithm, bool throwIfBinaryDetected) @@ -192,9 +233,9 @@ public static SourceText From(byte[] buffer, int length, Encoding encoding, Sour /// If the given encoding is set to use a throwing decoder as a fallback /// Two consecutive NUL characters were detected in the decoded text and was true. public static SourceText From( - byte[] buffer, - int length, - Encoding encoding = null, + byte[] buffer, + int length, + Encoding encoding = null, SourceHashAlgorithm checksumAlgorithm = SourceHashAlgorithm.Sha1, bool throwIfBinaryDetected = false, bool canBeEmbedded = false) diff --git a/src/Workspaces/Core/Desktop/Workspace/Host/TemporaryStorage/TemporaryStorageServiceFactory.cs b/src/Workspaces/Core/Desktop/Workspace/Host/TemporaryStorage/TemporaryStorageServiceFactory.cs index 641ba9d20ad3a..aba8ff314efd6 100644 --- a/src/Workspaces/Core/Desktop/Workspace/Host/TemporaryStorage/TemporaryStorageServiceFactory.cs +++ b/src/Workspaces/Core/Desktop/Workspace/Host/TemporaryStorage/TemporaryStorageServiceFactory.cs @@ -170,12 +170,13 @@ private unsafe TextReader CreateTextReaderFromTemporaryStorage(ISupportDirectMem return new DirectMemoryAccessStreamReader(src + 1, streamLength / sizeof(char) - 1); } - private unsafe class DirectMemoryAccessStreamReader : TextReader + private unsafe class DirectMemoryAccessStreamReader : TemporaryStorageTextReader { private char* _position; private readonly char* _end; - public DirectMemoryAccessStreamReader(char* src, int length) + public DirectMemoryAccessStreamReader(char* src, int length) : + base(length) { Debug.Assert(src != null); Debug.Assert(length >= 0); @@ -184,6 +185,16 @@ public DirectMemoryAccessStreamReader(char* src, int length) _end = _position + length; } + public override int Peek() + { + if (_position >= _end) + { + return -1; + } + + return *_position; + } + public override int Read() { if (_position >= _end) @@ -337,6 +348,16 @@ private async Task WriteStreamMaybeAsync(Stream stream, bool useAsync, Cancellat } } } + + internal class TemporaryStorageTextReader : TextReader + { + public TemporaryStorageTextReader(int length) + { + Length = length; + } + + public int Length { get; } + } } } diff --git a/src/Workspaces/Core/Desktop/Workspace/Host/TextFactory/DesktopTextFactoryService.cs b/src/Workspaces/Core/Desktop/Workspace/Host/TextFactory/DesktopTextFactoryService.cs index ad81a858cd7e7..9a35189227b26 100644 --- a/src/Workspaces/Core/Desktop/Workspace/Host/TextFactory/DesktopTextFactoryService.cs +++ b/src/Workspaces/Core/Desktop/Workspace/Host/TextFactory/DesktopTextFactoryService.cs @@ -21,6 +21,13 @@ internal class DesktopTextFactoryService : ITextFactoryService public SourceText CreateText(TextReader reader, Encoding encoding, CancellationToken cancellationToken = default(CancellationToken)) { cancellationToken.ThrowIfCancellationRequested(); + + var temporaryStorageReader = reader as TemporaryStorageServiceFactory.TemporaryStorageTextReader; + if (temporaryStorageReader != null) + { + return SourceText.From(temporaryStorageReader, temporaryStorageReader.Length, encoding); + } + return SourceText.From(reader.ReadToEnd(), encoding); } } From c8526d8d0c27aabe42bf4157a27eab3886128ef0 Mon Sep 17 00:00:00 2001 From: Heejae Chang Date: Fri, 14 Oct 2016 16:42:13 -0700 Subject: [PATCH 2/4] added unit tests --- .../CodeAnalysisTest/EmbeddedTextTests.cs | 32 +++++++++++++++- .../CodeAnalysisTest/Text/LargeTextTests.cs | 18 +++++++++ .../CodeAnalysisTest/Text/SourceTextTests.cs | 37 +++++++++++++++++++ 3 files changed, 85 insertions(+), 2 deletions(-) diff --git a/src/Compilers/Core/CodeAnalysisTest/EmbeddedTextTests.cs b/src/Compilers/Core/CodeAnalysisTest/EmbeddedTextTests.cs index 40b556b3867fd..e204c908a4277 100644 --- a/src/Compilers/Core/CodeAnalysisTest/EmbeddedTextTests.cs +++ b/src/Compilers/Core/CodeAnalysisTest/EmbeddedTextTests.cs @@ -10,8 +10,6 @@ using System.IO.Compression; using Roslyn.Test.Utilities; using System.Linq; -using System.Collections.Immutable; -using System.Reflection; namespace Microsoft.CodeAnalysis.UnitTests { @@ -194,6 +192,36 @@ public void FromSource_Large() AssertEx.Equal(Encoding.Unicode.GetPreamble().Concat(Encoding.Unicode.GetBytes(LargeSource)), Decompress(text.Blob.Skip(4))); } + [Fact] + public void FromTextReader_Small() + { + var expected = SourceText.From(SmallSource, Encoding.UTF8, SourceHashAlgorithm.Sha1); + var expectedEmbeded = EmbeddedText.FromSource("pathToSmall", expected); + + var actual = SourceText.From(new StringReader(SmallSource), SmallSource.Length, Encoding.UTF8, SourceHashAlgorithm.Sha1); + var actualEmbeded = EmbeddedText.FromSource(expectedEmbeded.FilePath, actual); + + Assert.Equal(expectedEmbeded.FilePath, actualEmbeded.FilePath); + Assert.Equal(expectedEmbeded.ChecksumAlgorithm, actualEmbeded.ChecksumAlgorithm); + AssertEx.Equal(expectedEmbeded.Checksum, actualEmbeded.Checksum); + AssertEx.Equal(expectedEmbeded.Blob, actualEmbeded.Blob); + } + + [Fact] + public void FromTextReader_Large() + { + var expected = SourceText.From(LargeSource, Encoding.UTF8, SourceHashAlgorithm.Sha1); + var expectedEmbeded = EmbeddedText.FromSource("pathToSmall", expected); + + var actual = SourceText.From(new StringReader(LargeSource), LargeSource.Length, Encoding.UTF8, SourceHashAlgorithm.Sha1); + var actualEmbeded = EmbeddedText.FromSource(expectedEmbeded.FilePath, actual); + + Assert.Equal(expectedEmbeded.FilePath, actualEmbeded.FilePath); + Assert.Equal(expectedEmbeded.ChecksumAlgorithm, actualEmbeded.ChecksumAlgorithm); + AssertEx.Equal(expectedEmbeded.Checksum, actualEmbeded.Checksum); + AssertEx.Equal(expectedEmbeded.Blob, actualEmbeded.Blob); + } + [Fact] public void FromSource_Precomputed() { diff --git a/src/Compilers/Core/CodeAnalysisTest/Text/LargeTextTests.cs b/src/Compilers/Core/CodeAnalysisTest/Text/LargeTextTests.cs index 93f8afed4736d..5edf59caeeea6 100644 --- a/src/Compilers/Core/CodeAnalysisTest/Text/LargeTextTests.cs +++ b/src/Compilers/Core/CodeAnalysisTest/Text/LargeTextTests.cs @@ -29,6 +29,11 @@ private static SourceText CreateSourceText(Stream stream, Encoding encoding = nu return LargeText.Decode(stream, encoding ?? Encoding.UTF8, SourceHashAlgorithm.Sha1, throwIfBinaryDetected: true, canBeEmbedded: false); } + private static SourceText CreateSourceText(TextReader reader, int length, Encoding encoding = null) + { + return LargeText.Decode(reader, length, encoding ?? Encoding.UTF8, SourceHashAlgorithm.Sha1); + } + private const string HelloWorld = "Hello, world!"; [Fact] @@ -313,5 +318,18 @@ public void LinesGetText2() var data = CreateSourceText(text); Assert.Equal("foo", data.Lines[0].ToString()); } + + [Fact] + public void FromTextReader() + { + var expected = "foo"; + var expectedSourceText = CreateSourceText(expected); + + var actual = new StringReader(expected); + var actualSourceText = CreateSourceText(actual, expected.Length); + + Assert.Equal("foo", actualSourceText.Lines[0].ToString()); + Assert.Equal(expectedSourceText.GetChecksum(), actualSourceText.GetChecksum()); + } } } \ No newline at end of file diff --git a/src/Compilers/Core/CodeAnalysisTest/Text/SourceTextTests.cs b/src/Compilers/Core/CodeAnalysisTest/Text/SourceTextTests.cs index f00e3239fa93d..3969a029c75e6 100644 --- a/src/Compilers/Core/CodeAnalysisTest/Text/SourceTextTests.cs +++ b/src/Compilers/Core/CodeAnalysisTest/Text/SourceTextTests.cs @@ -229,6 +229,43 @@ public void FromThrowsIfBinary() Assert.Throws(() => SourceText.From(stream, throwIfBinaryDetected: true)); } + [Fact] + public void FromTextReader() + { + var expected = "Text reader source text test"; + var expectedSourceText = SourceText.From(expected); + + var actual = new StringReader(expected); + var actualSourceText = SourceText.From(actual, expected.Length); + + Assert.Equal(expectedSourceText.GetChecksum(), actualSourceText.GetChecksum()); + + var utf8NoBOM = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false); + + Assert.Same(s_utf8, SourceText.From(actual, expected.Length, s_utf8).Encoding); + Assert.Same(s_unicode, SourceText.From(actual, expected.Length, s_unicode).Encoding); + Assert.Equal(utf8NoBOM, SourceText.From(actual, expected.Length, null).Encoding); + } + + [Fact] + public void FromTextReader_Large() + { + var expected = new string('l', SourceText.LargeObjectHeapLimitInChars); + var expectedSourceText = SourceText.From(expected); + + var actual = new StringReader(expected); + var actualSourceText = SourceText.From(actual, expected.Length); + + Assert.IsType(actualSourceText); + Assert.Equal(expectedSourceText.GetChecksum(), actualSourceText.GetChecksum()); + + var utf8NoBOM = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false); + + Assert.Same(s_utf8, SourceText.From(actual, expected.Length, s_utf8).Encoding); + Assert.Same(s_unicode, SourceText.From(actual, expected.Length, s_unicode).Encoding); + Assert.Equal(utf8NoBOM, SourceText.From(actual, expected.Length, null).Encoding); + } + private static void TestTryReadByteOrderMark(Encoding expectedEncoding, int expectedPreambleLength, byte[] data) { TestTryReadByteOrderMark(expectedEncoding, expectedPreambleLength, data, data == null ? 0 : data.Length); From 32c80e871735c86ab02262f6704205ab11b43f57 Mon Sep 17 00:00:00 2001 From: Heejae Chang Date: Fri, 14 Oct 2016 17:33:43 -0700 Subject: [PATCH 3/4] fixed test failures. allow SourceText From TextReader to have null encoding lke the one from string. --- .../CodeAnalysisTest/Text/SourceTextTests.cs | 6 ++---- src/Compilers/Core/Portable/Text/LargeText.cs | 18 +++++++++--------- src/Compilers/Core/Portable/Text/SourceText.cs | 4 ---- 3 files changed, 11 insertions(+), 17 deletions(-) diff --git a/src/Compilers/Core/CodeAnalysisTest/Text/SourceTextTests.cs b/src/Compilers/Core/CodeAnalysisTest/Text/SourceTextTests.cs index 3969a029c75e6..ed3805b3e684e 100644 --- a/src/Compilers/Core/CodeAnalysisTest/Text/SourceTextTests.cs +++ b/src/Compilers/Core/CodeAnalysisTest/Text/SourceTextTests.cs @@ -240,11 +240,9 @@ public void FromTextReader() Assert.Equal(expectedSourceText.GetChecksum(), actualSourceText.GetChecksum()); - var utf8NoBOM = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false); - Assert.Same(s_utf8, SourceText.From(actual, expected.Length, s_utf8).Encoding); Assert.Same(s_unicode, SourceText.From(actual, expected.Length, s_unicode).Encoding); - Assert.Equal(utf8NoBOM, SourceText.From(actual, expected.Length, null).Encoding); + Assert.Null(SourceText.From(actual, expected.Length, null).Encoding); } [Fact] @@ -263,7 +261,7 @@ public void FromTextReader_Large() Assert.Same(s_utf8, SourceText.From(actual, expected.Length, s_utf8).Encoding); Assert.Same(s_unicode, SourceText.From(actual, expected.Length, s_unicode).Encoding); - Assert.Equal(utf8NoBOM, SourceText.From(actual, expected.Length, null).Encoding); + Assert.Null(SourceText.From(actual, expected.Length, null).Encoding); } private static void TestTryReadByteOrderMark(Encoding expectedEncoding, int expectedPreambleLength, byte[] data) diff --git a/src/Compilers/Core/Portable/Text/LargeText.cs b/src/Compilers/Core/Portable/Text/LargeText.cs index 7d910b73eb67b..b27ffbbfa5b1f 100644 --- a/src/Compilers/Core/Portable/Text/LargeText.cs +++ b/src/Compilers/Core/Portable/Text/LargeText.cs @@ -24,13 +24,13 @@ internal sealed class LargeText : SourceText private readonly ImmutableArray _chunks; private readonly int[] _chunkStartOffsets; private readonly int _length; - private readonly Encoding _encoding; + private readonly Encoding _encodingOpt; - internal LargeText(ImmutableArray chunks, Encoding encoding, ImmutableArray checksum, SourceHashAlgorithm checksumAlgorithm, ImmutableArray embeddedTextBlob) + internal LargeText(ImmutableArray chunks, Encoding encodingOpt, ImmutableArray checksum, SourceHashAlgorithm checksumAlgorithm, ImmutableArray embeddedTextBlob) : base(checksum, checksumAlgorithm, embeddedTextBlob) { _chunks = chunks; - _encoding = encoding; + _encodingOpt = encodingOpt; _chunkStartOffsets = new int[chunks.Length]; int offset = 0; @@ -43,8 +43,8 @@ internal LargeText(ImmutableArray chunks, Encoding encoding, ImmutableAr _length = offset; } - internal LargeText(ImmutableArray chunks, Encoding encoding, SourceHashAlgorithm checksumAlgorithm) - : this(chunks, encoding, default(ImmutableArray), checksumAlgorithm, default(ImmutableArray)) + internal LargeText(ImmutableArray chunks, Encoding encodingOpt, SourceHashAlgorithm checksumAlgorithm) + : this(chunks, encodingOpt, default(ImmutableArray), checksumAlgorithm, default(ImmutableArray)) { } @@ -74,17 +74,17 @@ internal static SourceText Decode(Stream stream, Encoding encoding, SourceHashAl } } - internal static SourceText Decode(TextReader reader, int length, Encoding encoding, SourceHashAlgorithm checksumAlgorithm) + internal static SourceText Decode(TextReader reader, int length, Encoding encodingOpt, SourceHashAlgorithm checksumAlgorithm) { if (length == 0) { - return SourceText.From(string.Empty, encoding, checksumAlgorithm); + return SourceText.From(string.Empty, encodingOpt, checksumAlgorithm); } // throwIfBinaryDetected == false since we are given text reader from the beginning var chunks = ReadFromTextReader(reader, length, throwIfBinaryDetected: false); - return new LargeText(chunks, encoding, checksumAlgorithm); + return new LargeText(chunks, encodingOpt, checksumAlgorithm); } private static ImmutableArray ReadFromTextReader(TextReader reader, int maxCharRemainingGuess, bool throwIfBinaryDetected) @@ -173,7 +173,7 @@ public override char this[int position] } } - public override Encoding Encoding => _encoding; + public override Encoding Encoding => _encodingOpt; public override int Length => _length; diff --git a/src/Compilers/Core/Portable/Text/SourceText.cs b/src/Compilers/Core/Portable/Text/SourceText.cs index 047c3a835e3e5..4f3555f6b7381 100644 --- a/src/Compilers/Core/Portable/Text/SourceText.cs +++ b/src/Compilers/Core/Portable/Text/SourceText.cs @@ -124,10 +124,6 @@ public static SourceText From( throw new ArgumentNullException(nameof(reader)); } - ValidateChecksumAlgorithm(checksumAlgorithm); - - encoding = encoding ?? s_utf8EncodingWithNoBOM; - // If the resulting string would end up on the large object heap, then use LargeEncodedText. if (length >= LargeObjectHeapLimitInChars) { From f73dd868eb9c33c4ab09b6abc0576f62c71c6576 Mon Sep 17 00:00:00 2001 From: Heejae Chang Date: Mon, 17 Oct 2016 13:06:08 -0700 Subject: [PATCH 4/4] renamed some methods following suggestion. --- src/Compilers/Core/Portable/Text/LargeText.cs | 6 +- .../TemporaryStorageServiceFactory.cs | 123 +++++++++--------- .../TextFactory/DesktopTextFactoryService.cs | 2 +- 3 files changed, 62 insertions(+), 69 deletions(-) diff --git a/src/Compilers/Core/Portable/Text/LargeText.cs b/src/Compilers/Core/Portable/Text/LargeText.cs index b27ffbbfa5b1f..008e66758e88a 100644 --- a/src/Compilers/Core/Portable/Text/LargeText.cs +++ b/src/Compilers/Core/Portable/Text/LargeText.cs @@ -64,7 +64,7 @@ internal static SourceText Decode(Stream stream, Encoding encoding, SourceHashAl using (var reader = new StreamReader(stream, encoding, detectEncodingFromByteOrderMarks: true, bufferSize: Math.Min(length, 4096), leaveOpen: true)) { - var chunks = ReadFromTextReader(reader, maxCharRemainingGuess, throwIfBinaryDetected); + var chunks = ReadChunksFromTextReader(reader, maxCharRemainingGuess, throwIfBinaryDetected); // We must compute the checksum and embedded text blob now while we still have the original bytes in hand. // We cannot re-encode to obtain checksum and blob as the encoding is not guaranteed to round-trip. @@ -82,12 +82,12 @@ internal static SourceText Decode(TextReader reader, int length, Encoding encodi } // throwIfBinaryDetected == false since we are given text reader from the beginning - var chunks = ReadFromTextReader(reader, length, throwIfBinaryDetected: false); + var chunks = ReadChunksFromTextReader(reader, length, throwIfBinaryDetected: false); return new LargeText(chunks, encodingOpt, checksumAlgorithm); } - private static ImmutableArray ReadFromTextReader(TextReader reader, int maxCharRemainingGuess, bool throwIfBinaryDetected) + private static ImmutableArray ReadChunksFromTextReader(TextReader reader, int maxCharRemainingGuess, bool throwIfBinaryDetected) { var chunks = ArrayBuilder.GetInstance(1 + maxCharRemainingGuess / ChunkSize); diff --git a/src/Workspaces/Core/Desktop/Workspace/Host/TemporaryStorage/TemporaryStorageServiceFactory.cs b/src/Workspaces/Core/Desktop/Workspace/Host/TemporaryStorage/TemporaryStorageServiceFactory.cs index aba8ff314efd6..9c2d447aa8b60 100644 --- a/src/Workspaces/Core/Desktop/Workspace/Host/TemporaryStorage/TemporaryStorageServiceFactory.cs +++ b/src/Workspaces/Core/Desktop/Workspace/Host/TemporaryStorage/TemporaryStorageServiceFactory.cs @@ -169,69 +169,6 @@ private unsafe TextReader CreateTextReaderFromTemporaryStorage(ISupportDirectMem return new DirectMemoryAccessStreamReader(src + 1, streamLength / sizeof(char) - 1); } - - private unsafe class DirectMemoryAccessStreamReader : TemporaryStorageTextReader - { - private char* _position; - private readonly char* _end; - - public DirectMemoryAccessStreamReader(char* src, int length) : - base(length) - { - Debug.Assert(src != null); - Debug.Assert(length >= 0); - - _position = src; - _end = _position + length; - } - - public override int Peek() - { - if (_position >= _end) - { - return -1; - } - - return *_position; - } - - public override int Read() - { - if (_position >= _end) - { - return -1; - } - - return *_position++; - } - - public override int Read(char[] buffer, int index, int count) - { - if (buffer == null) - { - throw new ArgumentNullException(nameof(buffer)); - } - - if (index < 0 || index >= buffer.Length) - { - throw new ArgumentOutOfRangeException(nameof(index)); - } - - if (count < 0 || (index + count) > buffer.Length) - { - throw new ArgumentOutOfRangeException(nameof(count)); - } - - count = Math.Min(count, (int)(_end - _position)); - if (count > 0) - { - Marshal.Copy((IntPtr)_position, buffer, index, count); - _position += count; - } - - return count; - } - } } private class TemporaryStreamStorage : ITemporaryStreamStorage, ITemporaryStorageWithName @@ -349,14 +286,70 @@ private async Task WriteStreamMaybeAsync(Stream stream, bool useAsync, Cancellat } } - internal class TemporaryStorageTextReader : TextReader + internal unsafe class DirectMemoryAccessStreamReader : TextReader { - public TemporaryStorageTextReader(int length) + private char* _position; + private readonly char* _end; + + public DirectMemoryAccessStreamReader(char* src, int length) { + Debug.Assert(src != null); + Debug.Assert(length >= 0); + + _position = src; + _end = _position + length; + Length = length; } public int Length { get; } + + public override int Peek() + { + if (_position >= _end) + { + return -1; + } + + return *_position; + } + + public override int Read() + { + if (_position >= _end) + { + return -1; + } + + return *_position++; + } + + public override int Read(char[] buffer, int index, int count) + { + if (buffer == null) + { + throw new ArgumentNullException(nameof(buffer)); + } + + if (index < 0 || index >= buffer.Length) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + + if (count < 0 || (index + count) > buffer.Length) + { + throw new ArgumentOutOfRangeException(nameof(count)); + } + + count = Math.Min(count, (int)(_end - _position)); + if (count > 0) + { + Marshal.Copy((IntPtr)_position, buffer, index, count); + _position += count; + } + + return count; + } } } } diff --git a/src/Workspaces/Core/Desktop/Workspace/Host/TextFactory/DesktopTextFactoryService.cs b/src/Workspaces/Core/Desktop/Workspace/Host/TextFactory/DesktopTextFactoryService.cs index 9a35189227b26..dbee72162bb17 100644 --- a/src/Workspaces/Core/Desktop/Workspace/Host/TextFactory/DesktopTextFactoryService.cs +++ b/src/Workspaces/Core/Desktop/Workspace/Host/TextFactory/DesktopTextFactoryService.cs @@ -22,7 +22,7 @@ internal class DesktopTextFactoryService : ITextFactoryService { cancellationToken.ThrowIfCancellationRequested(); - var temporaryStorageReader = reader as TemporaryStorageServiceFactory.TemporaryStorageTextReader; + var temporaryStorageReader = reader as TemporaryStorageServiceFactory.DirectMemoryAccessStreamReader; if (temporaryStorageReader != null) { return SourceText.From(temporaryStorageReader, temporaryStorageReader.Length, encoding);