forked from Azure/cognitive-services
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Program_UploadRss.cs
166 lines (150 loc) · 8.96 KB
/
Program_UploadRss.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
//
// Copyright Microsoft Corporation ("Microsoft").
//
// Microsoft grants you the right to use this software in accordance with your subscription agreement, if any, to use software
// provided for use with Microsoft Azure ("Subscription Agreement"). All software is licensed, not sold.
//
// If you do not have a Subscription Agreement, or at your option if you so choose, Microsoft grants you a nonexclusive, perpetual,
// royalty-free right to use and modify this software solely for your internal business purposes in connection with Microsoft Azure
// and other Microsoft products, including but not limited to, Microsoft R Open, Microsoft R Server, and Microsoft SQL Server.
//
// Unless otherwise stated in your Subscription Agreement, the following applies. THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT
// WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL MICROSOFT OR ITS LICENSORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THE SAMPLE CODE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
using RssGenerator.CosmosDBHelper;
using RssGenerator.RecordFormats;
using RssGenerator.RSS;
using RssGenerator.StorageHelper;
using System;
using System.Collections.Generic;
namespace RssGenerator
{
partial class Program
{
/// <summary>
/// Upload the RSS feeds latest data that are identified in the configuration.
/// </summary>
/// <param name="config">Configuration information including RSS feeds</param>
/// <param name="client">Cosmos DB client to upload to</param>
/// <param name="storageUtility">Azure Storage client to store attachments in</param>
/// <returns>Status string for output</returns>
private static String UploadRssFeeds(Configuration config, CosmosDbClient client, AzureStorageUtility storageUtility)
{
/////////////////////////////////////////////////////////////////////////////////////////////////
// Looop through each of the RSS feeds, collect the articles, then upload them.
/////////////////////////////////////////////////////////////////////////////////////////////////
foreach (RssFeedInfo feed in config.Feeds)
{
Console.WriteLine("Processing feed : " + feed.RSSFeed);
using (RSSFeedReader rssReader = new RSSFeedReader(feed.RSSFeed))
{
/////////////////////////////////////////////////////////////////////////////////////////
// The the batch of articles.....
/////////////////////////////////////////////////////////////////////////////////////////
int feedItemCount = 0;
List<RSSFeedItem> feedItems = rssReader.ReadFeed();
Console.WriteLine("Feed : " + feed.RSSFeed + " has " + feedItems.Count + " items");
/////////////////////////////////////////////////////////////////////////////////////////
// For each article, upload it's image(s) and content.
/////////////////////////////////////////////////////////////////////////////////////////
foreach (RSSFeedItem item in feedItems)
{
Console.WriteLine("Inserting : " + item.Title);
Article mainArticle = new Article();
List<Article> imageContent = new List<Article>();
// Set up the images
foreach (RSSImageItem image in item.Images)
{
String blobUri = storageUtility.UploadBlob(image.Path, feed.AzureStorageContainer).Result;
if (!String.IsNullOrEmpty(blobUri))
{
Article media = new Article();
media.ArtifactType = "image";
media.AssetHash = image.Hash;
media.UniqueIdentifier = image.Id;
media.SetProperty(ArticleProperties.RetrievalDate, DateTime.Now.ToString("O"));
media.SetProperty(ArticleProperties.OriginalUri, image.Uri);
media.SetProperty(ArticleProperties.InternalUri, blobUri);
imageContent.Add(media);
}
}
// Set up the article
mainArticle.SetProperty(ArticleProperties.OriginalUri, item.Uri);
mainArticle.SetProperty(ArticleProperties.RetrievalDate, DateTime.Now.ToString("O"));
mainArticle.SetProperty(ArticleProperties.PostDate, item.PublishedDate);
mainArticle.SetProperty(ArticleProperties.Title, Program.CleanInput(item.Title));
mainArticle.SetProperty(ArticleProperties.Body, Program.CleanInput(item.Summary));
// Create the child entries and attach to teh article.
List<Dictionary<string, string>> childFiles = new List<Dictionary<string, string>>();
foreach (Article file in imageContent)
{
Dictionary<String, String> obj = new Dictionary<string, string>();
obj.Add(Article.MEDIA_ID, file.UniqueIdentifier);
obj.Add(Article.MEDIA_ORIG_URI, file.GetProperty(ArticleProperties.OriginalUri).ToString());
obj.Add(Article.MEDIA_INTERNAL_URI, file.GetProperty(ArticleProperties.InternalUri).ToString());
childFiles.Add(obj);
}
mainArticle.SetProperty(ArticleProperties.ChildImages, childFiles);
mainArticle.SetProperty(ArticleProperties.ChildVideos, null);
mainArticle.SetProperty(ArticleProperties.Author, null);
mainArticle.SetProperty(ArticleProperties.HeroImage, null);
// Insert the media file documents to cosmos first
foreach (Article imageArticle in imageContent)
{
try
{
bool imageResult = client.CreateDocument(config.CosmosDatabase, config.CosmosIngestCollection, imageArticle).Result;
Console.WriteLine("Image Insert: " + imageResult.ToString());
}
catch (Exception ex) { }
}
// Wait briefly....
System.Threading.Thread.Sleep(500);
// Insert the article document to cosmos second
bool articleResult = client.CreateDocument(config.CosmosDatabase, config.CosmosIngestCollection, mainArticle).Result;
Console.WriteLine("Article Insert: " + articleResult.ToString());
// Ease of use to restrict the flow of records, uncomment the break statement.
if (++feedItemCount > 5)
{
//break;
}
}
}
}
return "Finished uploading current articles.";
}
/// <summary>
/// Have to clean up the text that may be translated. HTML tags and double quotes
/// cause the Translation API to fail.This applies to title and body.
/// </summary>
private static String CleanInput(String input)
{
String returnValue = input;
if (!String.IsNullOrEmpty(input))
{
int idxStart = 0;
String tempValue = input;
while ((idxStart = tempValue.IndexOf('<')) != -1)
{
String resultString = String.Empty;
int idxEnd = tempValue.IndexOf('>');
if (idxStart > 0)
{
resultString = tempValue.Substring(0, idxStart);
}
resultString += tempValue.Substring(idxEnd + 1);
tempValue = resultString;
}
returnValue = tempValue;
}
// Finally strip out any double quotes...causes an exception in the translation API
return returnValue.Replace('\"', '\'');
}
}
}