-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy pathWebFileDownloader.cs
122 lines (105 loc) · 5.19 KB
/
WebFileDownloader.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
// Copyright (c) The University of Dundee 2018-2019
// This file is part of the Research Data Management Platform (RDMP).
// RDMP is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
// RDMP is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
// You should have received a copy of the GNU General Public License along with RDMP. If not, see <https://www.gnu.org/licenses/>.
using System;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Net;
using System.Net.Http;
using FAnsi.Discovery;
using Rdmp.Core.Curation;
using Rdmp.Core.Curation.Data;
using Rdmp.Core.DataFlowPipeline;
using Rdmp.Core.DataLoad.Engine.DataProvider;
using Rdmp.Core.DataLoad.Engine.Job;
using Rdmp.Core.ReusableLibraryCode.Checks;
using Rdmp.Core.ReusableLibraryCode.Progress;
using MissingFieldException = System.MissingFieldException;
namespace Rdmp.Core.DataLoad.Modules.Web;
/// <summary>
/// Data load component which downloads a file from a remote URL (e.g. http) into the ForLoading directory of the load.
/// </summary>
public class WebFileDownloader : IPluginDataProvider
{
[DemandsInitialization(
"The full URI to a file that will be downloaded into project ForLoading directory, must be a valid Uri",
Mandatory = true)]
public Uri UriToFile { get; set; }
[DemandsInitialization(
"Optional Username/password to use for network Websense challenges, these will be provided to the WebRequest as a NetworkCredential")]
public DataAccessCredentials WebsenseCredentials { get; set; }
public void Initialize(ILoadDirectory directory, DiscoveredDatabase dbInfo)
{
}
public ExitCodeType Fetch(IDataLoadJob job, GracefulCancellationToken cancellationToken)
{
var t = Stopwatch.StartNew();
var destinationFile =
new FileInfo(Path.Combine(job.LoadDirectory.ForLoading.FullName, Path.GetFileName(UriToFile.LocalPath)));
DownloadFileWhilstPretendingToBeFirefox(destinationFile, job);
job.OnProgress(this,
new ProgressEventArgs(destinationFile.FullName,
new ProgressMeasurement((int)(destinationFile.Length / 1000), ProgressType.Kilobytes), t.Elapsed));
return ExitCodeType.Success;
}
private void DownloadFileWhilstPretendingToBeFirefox(FileInfo destinationFile, IDataLoadJob job)
{
NetworkCredential credentials;
try
{
credentials =
new NetworkCredential(WebsenseCredentials.Username, WebsenseCredentials.GetDecryptedPassword());
}
catch (Exception)
{
credentials = null;
}
FetchRequest(File.Create(destinationFile.FullName), UriToFile.AbsoluteUri, credentials);
}
private static void FetchRequest(Stream output, string url, ICredentials credentials = null,
bool useCredentials = false)
{
using var httpClientHandler = new HttpClientHandler();
if (useCredentials && credentials is not null)
httpClientHandler.Credentials = credentials;
using var httpClient = new HttpClient(httpClientHandler, false)
{
Timeout = TimeSpan.FromSeconds(60)
};
httpClient.DefaultRequestHeaders.Add("User-Agent",
"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36");
using var response = httpClient.GetAsync(url).Result;
if (response.IsSuccessStatusCode)
{
response.Content.ReadAsStreamAsync().Result.CopyTo(output);
return;
}
// Failed - retry with credentials?
if (!useCredentials && response.Headers.WwwAuthenticate.Any(static h =>
h.Scheme.Equals("basic", StringComparison.OrdinalIgnoreCase) &&
h.Parameter?.Equals("realm=\"Websense\"", StringComparison.OrdinalIgnoreCase) == true))
FetchRequest(output, response.Headers.Location?.AbsoluteUri, credentials, true);
else
throw new Exception(
$"Could not get response from {url} - {response.StatusCode} - {response.ReasonPhrase}");
}
public string GetDescription() => throw new NotImplementedException();
public IDataProvider Clone() => throw new NotImplementedException();
public bool Validate(ILoadDirectory _) =>
string.IsNullOrWhiteSpace(UriToFile?.PathAndQuery)
? throw new MissingFieldException(
"PathToFile is null or white space - should be populated externally as a parameter")
: true;
public void LoadCompletedSoDispose(ExitCodeType exitCode, IDataLoadEventListener postLoadEventListener)
{
}
public void Check(ICheckNotifier notifier)
{
notifier.OnCheckPerformed(UriToFile == null
? new CheckEventArgs("No URI has been specified", CheckResult.Fail)
: new CheckEventArgs($"URI is:{UriToFile}", CheckResult.Success));
}
}