-
Notifications
You must be signed in to change notification settings - Fork 20
Expand file tree
/
Copy pathPdfSummarizer.cs
More file actions
150 lines (123 loc) · 5.55 KB
/
PdfSummarizer.cs
File metadata and controls
150 lines (123 loc) · 5.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
using Microsoft.Azure.Functions.Worker;
using Microsoft.DurableTask;
using Microsoft.DurableTask.Client;
using Microsoft.Extensions.Logging;
using Azure.Storage.Blobs;
using Azure.Identity;
using Azure.AI.FormRecognizer.DocumentAnalysis;
using Newtonsoft.Json;
using System.Net.Http.Headers;
using Microsoft.Azure.Functions.Worker.Extensions.OpenAI;
using Microsoft.Azure.Functions.Worker.Extensions.OpenAI.TextCompletion;
using Azure;
public class DurableFunctionApp
{
private readonly BlobServiceClient _blobServiceClient;
public DurableFunctionApp()
{
var credential = new DefaultAzureCredential();
var endpoint = Environment.GetEnvironmentVariable("AzureWebJobsStorage__blobServiceUri");
_blobServiceClient = new BlobServiceClient(new Uri(endpoint), credential);
}
[Function("BlobTrigger")]
public async Task BlobTrigger(
[BlobTrigger("input/{name}", Connection = "AzureWebJobsStorage")] ReadOnlyMemory<byte> myBlob,
string name,
[DurableClient] DurableTaskClient starter,
FunctionContext context)
{
var logger = context.GetLogger("BlobTrigger");
logger.LogInformation($"C# Blob trigger function Processed blob\n Name:{name} \n Size: {myBlob.Length} Bytes");
await starter.ScheduleNewOrchestrationInstanceAsync("ProcessDocument", name);
}
[Function("ProcessDocument")]
public async Task ProcessDocument(
[OrchestrationTrigger] TaskOrchestrationContext context,
FunctionContext functionContext)
{
var logger = functionContext.GetLogger("ProcessDocument");
string blobName = context.GetInput<string>();
var options = TaskOptions.FromRetryPolicy(new RetryPolicy(
maxNumberOfAttempts: 3,
firstRetryInterval: TimeSpan.FromSeconds(5)));
// Ensure the activity function returns a value
var result = await context.CallActivityAsync<string>("AnalyzePdf", blobName, options);
// Fan out to summarize the content in different languages
var languages = new[] { "Japanese", "Spanish", "French" };
var tasks = new List<Task<string>>();
foreach (var language in languages)
{
tasks.Add(context.CallActivityAsync<string>("SummarizeText", new { Text = result, Language = language }, options));
}
// Fan back in
var summaries = await Task.WhenAll(tasks);
// Write each summary to a separate file and upload to blob
for (int i = 0; i < summaries.Length; i++)
{
await context.CallActivityAsync<WriteDocInput>("WriteDoc", new WriteDocInput { BlobName = blobName, Summary = summaries[i], Language = languages[i] }, options);
}
return;
}
[Function("AnalyzePdf")]
public async Task<string> AnalyzePdf([ActivityTrigger] string blobName, FunctionContext context)
{
var logger = context.GetLogger("AnalyzePdf");
logger.LogInformation("In AnalyzePdf activity");
var containerClient = _blobServiceClient.GetBlobContainerClient("input");
var blobClient = containerClient.GetBlobClient(blobName);
var blob = await blobClient.DownloadContentAsync();
var endpoint = Environment.GetEnvironmentVariable("COGNITIVE_SERVICES_ENDPOINT");
var credential = new DefaultAzureCredential();
var documentAnalysisClient = new DocumentAnalysisClient(new Uri(endpoint), credential);
var modelId = "prebuilt-layout";
AnalyzeDocumentOperation operation = await documentAnalysisClient.AnalyzeDocumentAsync(WaitUntil.Completed, modelId, blob.Value.Content.ToStream());
AnalyzeResult result = operation.Value;
var doc = string.Empty;
foreach (var page in result.Pages)
{
foreach (var line in page.Lines)
{
doc += line.Content;
}
}
return doc;
}
[Function("SummarizeText")]
public async Task<string> SummarizeText(
[ActivityTrigger] dynamic input,
[TextCompletionInput("Can you explain what the following text is about in {Language}? {Text}", Model = "%CHAT_MODEL_DEPLOYMENT_NAME%")] TextCompletionResponse response,
FunctionContext context)
{
var logger = context.GetLogger("SummarizeText");
logger.LogInformation("In SummarizeText activity");
logger.LogInformation(response.Content);
return response.Content.ToString();
}
[Function("WriteDoc")]
public async Task<string> WriteDoc([ActivityTrigger] WriteDocInput results, FunctionContext context)
{
var logger = context.GetLogger("WriteDoc");
logger.LogInformation("In WriteDoc activity");
var containerClient = _blobServiceClient.GetBlobContainerClient("output");
try
{
var name = results.BlobName.Replace(".pdf", string.Empty);
var fileName = $"{name}-summary-{results.Language}.txt";
logger.LogInformation($"Uploading to blob {results.Summary}");
await containerClient.UploadBlobAsync(fileName, new BinaryData(results.Summary));
return null;
}
catch (Exception ex)
{
logger.LogError($"Error uploading to blob: {ex.Message}");
throw;
}
}
}
// Define the WriteDocInput class here
public class WriteDocInput
{
public string BlobName { get; set; }
public string Summary { get; set; }
public string Language { get; set; }
}