Export transcript markers for Adobe Premiere

You want to transcribe media in the cloud and receive transcript markers in Adobe Premiere at the correct start TC

With AVflow.io you can transcribe your audio/video in the cloud, such as by using Rev.com, and then receive an XML file for Adobe Premiere that attach the transcripts as markers to your clip at the correct start timecode/framerate. Imagine shooting interviews in one part of the country and those interviews/media (with TC-accurate transcripts) being ready for the edit team on the other side of the country when they awake--saving them so much work! Your post production team will love you. :)

Tip: clone this Flow right into your workspace.

Here's how to manually setup the Flow:

Extract metadata, transcribe, and export to Adobe Premiere: here's the flow
  1. AWS S3 should be the trigger to the Flow. (Learn more.)
  2. Next add the "Metadata" step. This step will extract relevant key info from your file such as start timecode and framerate so your markers align to the correct TC. [Learn more about the metadata step here.]



3.Add a transcription step, such as Rev.com (which has both AI and human transcription options). Enter your user and client API keys.

Make sure to enable "Force grouping". This will ensure that words are grouped together per markers as opposed to one word per marker.

Note: You should check which input formats your transcription API supports (most support mov, mp4, wav, mp3, ogg). If you are importing a video format that they do not support, you should add a transcoding step in advance of sending the media to the API.

4.Add "Transfer to Storage" step. This will get the output from the transcript step to save to your storage like AWS S3.



5.Next we need to use a Function step as we need to parse the JSON output from the Rev transcript step. Save the Flow and then create a new Function from the sidebar here:

a. Then copy this code into the Function's main window:

const fs = require('fs');
const path = require('path');
const https = require('https');
const request = require('request');
var AWS = require('aws-sdk');
const S3URLParser = require('amazon-s3-uri');

async function main (service, context) {
// console.log(service.videoFPS);
// console.log(service.videoTC);
// console.log(service.videoWidth);
// console.log(service.videoHeight);
// console.log(service.videoTranscription);
// console.log(service.videoDuration);
const { region, bucket, key } = S3URLParser(service.videoURL)
const sourceFile = getMediaFileName(key);

let resolution = getWidthHeight(service.videoWidth, service.videoHeight);

let local_transcription_file = './transcription.json';
await downloadFromS3(service.videoTranscription, local_transcription_file);

let transcriptRows = await buildTranscriptRows(local_transcription_file);
let mediaInfo = buildMediaInfo(sourceFile, service.videoDuration, resolution.width, resolution.height, service.videoFPS, service.videoTC);
let mediaBlock = buildMediaBlock(mediaInfo, transcriptRows);
let projectBlock = buildProjectBlock(`${sourceFile}`, resolution.width, resolution.height, service.videoFPS, service.videoTC);
let segmentBlock = buildSegmentBlock(mediaInfo, transcriptRows);

let result = {};

result["content"] = {
media: mediaBlock,
segments: segmentBlock,
project: projectBlock
};

console.log(JSON.stringify(result));

return {
result: JSON.parse(JSON.stringify(result))
// result: JSON.stringify(result)
};
}

function buildProjectBlock(prjName, vidWidth, vidHeight, vidFPS, vidTC) {
let tcString = getMediaTimecode(vidTC);

var project = {
name: prjName,
creation_date: "2021-03-18T04:50:23.000Z",
export_date: "2021-06-03T09:40:07.504Z",
width: vidWidth,
height: vidHeight,
fps: vidFPS,
timecode: `${tcString}`
};

return project;
}

function buildMediaBlock(mediaInfo, transcriptRows) {
var media = []

media.push({
mediaInfo: mediaInfo,
transcriptRows: transcriptRows
});

console.log(JSON.stringify(media));
return media;
}

function buildMediaInfo(file, duration, width, height, FPS, TC) {

let tcString = getMediaTimecode(TC);

var mediaInfo = {
id: 20077,
creation_date: "2021-03-18T04:51:45.000Z",
name: `${file}`,
url: `file://${file}`,
startTimeCode: `${tcString}`,
durationInSeconds: 102.129188,
width: width,
height: height,
fps: FPS,
type: "video"
}

// console.log(mediaInfo);
return mediaInfo;
}

async function buildTranscriptRows(transcription_file) {
let transcription_object = await loadTranscriptionFromFile(transcription_file);

let words = transcription_object.result.words;

var transcriptRows = [];

for (let i = 0; i < words.length; i++) {
transcriptRows.push({
startTime: parseFloat(words[i].startTime),
endTime: parseFloat(words[i].endTime),
value: words[i].value.replace(/[\/\(\)\']/g, "")
});
}

console.log(transcriptRows);
return transcriptRows;
}

function buildSegmentBlock(mediaInfo, transcriptRows) {
var segments = [];

for (let i = 0; i < transcriptRows.length; i++) {
segments.push({
id: i,
imported_media_id: 20077,
start: transcriptRows[i].startTime,
end: transcriptRows[i].endTime,
text: transcriptRows[i].value,
imported_media: {
id: 20077,
name: mediaInfo.name,
url: mediaInfo.url,
original_file_url: null,
type: "video",
startTimeCode: mediaInfo.startTimeCode
}
});
}

// console.log(segments);
return segments;
}

function getMediaType(url) {

}

function getMediaFileName(path) {
return path.split("/").pop();
}

function getMediaTimecode(tc) {
if (tc == "[[1.media-info.time_code_of_first_frame]]")
return '00:00:00:00';
else return tc;
}

function getWidthHeight(rawWidth, rawHeight) {
const widths = rawWidth.split(',');
const heights = rawHeight.split(',');
// console.log(widths[0]);
// console.log(heights[0]);

return {
width: widths[0],
height: heights[0]
}
}

async function loadTranscriptionFromFile(file_url) {
let rawdata = fs.readFileSync(file_url);
let transcription = JSON.parse(rawdata);

return transcription;
}

async function downloadFromS3(srcUrl, dstFile) {
var downloaded = 0;

return new Promise((resolve, reject) => {
const file = fs.createWriteStream(dstFile);
const request = https.get(srcUrl, (response) => {
response.pipe(file);
response.on('data',(chunk) => {
downloaded += chunk.length;
})

file.on('finish', (e) => {
console.log('Saved to: ' + dstFile);
console.log('File length: ' + downloaded);
file.end();
resolve({
fileName: dstFile,
length: downloaded
});
});
});
});
}


module.exports = main;

b. Input Parameters:

[
{
dataTypeId: 4,
supportedDataTypeIds: [
'4',
'16',
'14',
'12',
'15',
'1',
'2',
'3',
'6',
'7'
],
require: true,
key: 'videoURL',
dataType: 'string'
},
{
dataTypeId: 4,
supportedDataTypeIds: [
'4',
'16',
'14',
'12',
'15',
'1',
'2',
'3',
'6',
'7'
],
require: true,
key: 'videoFPS',
dataType: 'string'
},
{
dataTypeId: 4,
supportedDataTypeIds: [
'4',
'16',
'14',
'12',
'15',
'1',
'2',
'3',
'6',
'7'
],
require: true,
key: 'videoTC',
dataType: 'string'
},
{
dataTypeId: 4,
supportedDataTypeIds: [
'4',
'16',
'14',
'12',
'15',
'1',
'2',
'3',
'6',
'7'
],
required: true,
key: 'videoWidth',
dataType: 'string'
},
{
dataTypeId: 4,
supportedDataTypeIds: [
'4',
'16',
'14',
'12',
'15',
'1',
'2',
'3',
'6',
'7'
],
required: true,
key: 'videoHeight',
dataType: 'string'
},
{
dataTypeId: 2,
supportedDataTypeIds: [
'4',
'16',
'2'
],
require: false,
key: 'videoTranscription',
dataType: 'transcription'
},
{
dataTypeId: 4,
supportedDataTypeIds: [
'4',
'16',
'14',
'12',
'15',
'1',
'2',
'3',
'6',
'7'
],
require: true,
key: 'videoDuration',
dataType: 'string'
}
]

c. Output parameters:

[
{
dataTypeId: 4,
key: 'result',
dataType: 'string'
}
]

d. Packages:

Copy and paste the below:

[
'amazon-s3-uri'
]

to the packages section (at the bottom) here:

e. Save the Function.

6.Go back to the Flow you were working on and add a Function step into the Flow and choose the function you just created.

See the screenshot below for how to setup the Input data for the script:Then add the "Export XML" step which will create an Adobe Premiere-ready XML file. The JSON source will be the output from the Step 4 Function step.


7.Then add the "Export XML" step which will create an Adobe Premiere-ready XML file. The JSON source will be the output from the Step 4 Function step.

8.Next: use the "Transfer to Storage" step to save that XML to your AWS S3 bucket.

Great work! Turn the Flow on and you are good to go.

When your Flow has executed, download the XML and open it into Adobe Premiere Pro. Relink media to that source file that you first used to trigger the Flow and now you will see markers on clips and markers on the timeline, giving you options that suit your desired workflow.

TaDa!