From 7efffa3d71d6652aab9806eedb0df19c462cc28f Mon Sep 17 00:00:00 2001
From: cmendible <266546+cmendible@users.noreply.github.com>
Date: Wed, 17 Jan 2024 22:14:49 +0100
Subject: [PATCH] added aihub

---
 .github/workflows/main.yaml                   |  71 ++
 infra/.terraform.lock.hcl                     | 134 ++--
 infra/README.md                               |  37 +-
 infra/main.tf                                 | 144 ++--
 infra/modules/apim/main.tf                    |  14 +-
 .../{ca-back => ca-aihub}/auth_config.tf      |   3 +-
 infra/modules/ca-aihub/main.tf                | 208 ++++++
 .../modules/{ca-back => ca-aihub}/outputs.tf  |   0
 .../{ca-webapi => ca-aihub}/providers.tf      |   2 +-
 .../{ca-webapi => ca-aihub}/variables.tf      |  18 +-
 infra/modules/ca-chat/auth_config.tf          |  67 ++
 infra/modules/{ca-back => ca-chat}/main.tf    |   2 +-
 infra/modules/ca-chat/outputs.tf              |   4 +
 .../{ca-webapp => ca-chat}/providers.tf       |   2 +-
 .../modules/{ca-back => ca-chat}/variables.tf |   2 +-
 infra/modules/ca-prep-docs/main.tf            | 120 ++++
 .../{ca-back => ca-prep-docs}/providers.tf    |   2 +-
 infra/modules/ca-prep-docs/variables.tf       |  13 +
 infra/modules/ca-webapi/main.tf               | 102 ---
 infra/modules/ca-webapi/outputs.tf            |   4 -
 infra/modules/ca-webapp/main.tf               |  59 --
 infra/modules/ca-webapp/outputs.tf            |   3 -
 infra/modules/ca-webapp/variables.tf          |   9 -
 infra/modules/cae/providers.tf                |   2 +-
 infra/modules/cog/main.tf                     |  56 ++
 infra/modules/cog/outputs.tf                  |  19 +
 infra/modules/cog/providers.tf                |  20 +
 infra/modules/cog/variables.tf                |   8 +
 infra/modules/nsg/nsg_apim.tf                 |  35 +-
 infra/modules/openai/main.tf                  |  57 +-
 infra/modules/openai/variables.tf             |   1 -
 infra/modules/search/main.tf                  |   3 +-
 infra/modules/sp/main.tf                      |   5 +-
 infra/modules/sp/outputs.tf                   |   4 +
 infra/modules/st/main.tf                      |  35 +-
 infra/modules/st/outputs.tf                   |  10 +-
 infra/providers.tf                            |   2 +-
 infra/variables.tf                            |  55 +-
 scripts/Dockerfile                            |  11 +
 scripts/prepdocs.py                           | 638 ++++++++++++++++++
 scripts/prepdocs.sh                           |   4 +
 scripts/requirements.txt                      |   8 +
 42 files changed, 1579 insertions(+), 414 deletions(-)
 create mode 100644 .github/workflows/main.yaml
 rename infra/modules/{ca-back => ca-aihub}/auth_config.tf (93%)
 create mode 100644 infra/modules/ca-aihub/main.tf
 rename infra/modules/{ca-back => ca-aihub}/outputs.tf (100%)
 rename infra/modules/{ca-webapi => ca-aihub}/providers.tf (92%)
 rename infra/modules/{ca-webapi => ca-aihub}/variables.tf (55%)
 create mode 100644 infra/modules/ca-chat/auth_config.tf
 rename infra/modules/{ca-back => ca-chat}/main.tf (98%)
 create mode 100644 infra/modules/ca-chat/outputs.tf
 rename infra/modules/{ca-webapp => ca-chat}/providers.tf (92%)
 rename infra/modules/{ca-back => ca-chat}/variables.tf (93%)
 create mode 100644 infra/modules/ca-prep-docs/main.tf
 rename infra/modules/{ca-back => ca-prep-docs}/providers.tf (92%)
 create mode 100644 infra/modules/ca-prep-docs/variables.tf
 delete mode 100644 infra/modules/ca-webapi/main.tf
 delete mode 100644 infra/modules/ca-webapi/outputs.tf
 delete mode 100644 infra/modules/ca-webapp/main.tf
 delete mode 100644 infra/modules/ca-webapp/outputs.tf
 delete mode 100644 infra/modules/ca-webapp/variables.tf
 create mode 100644 infra/modules/cog/main.tf
 create mode 100644 infra/modules/cog/outputs.tf
 create mode 100644 infra/modules/cog/providers.tf
 create mode 100644 infra/modules/cog/variables.tf
 create mode 100644 scripts/Dockerfile
 create mode 100644 scripts/prepdocs.py
 create mode 100644 scripts/prepdocs.sh
 create mode 100644 scripts/requirements.txt

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
new file mode 100644
index 0000000..8988509
--- /dev/null
+++ b/.github/workflows/main.yaml
@@ -0,0 +1,71 @@
+# Docs for the Azure Web Apps Deploy action: https://github.com/Azure/webapps-deploy
+# More GitHub Actions for Azure: https://github.com/Azure/actions
+
+name: build
+
+on:
+    push:
+      branches:
+        - main
+        - aihub
+      tags:
+        - v*
+    pull_request:
+      branches:
+        - main
+    workflow_dispatch:
+
+env:
+  REGISTRY: ghcr.io
+  IMAGE_NAME: aihub-prepdocs
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+      
+      - name: Set up .NET Core
+        uses: actions/setup-dotnet@v1
+        with:
+          dotnet-version: '8.x'
+          include-prerelease: true
+
+      - name: Setup MinVer
+        run: |
+          dotnet tool install --global minver-cli --version 4.3.0
+          
+      - name: Calculate Version
+        run: |
+          echo "MINVERVERSIONOVERRIDE=$($HOME/.dotnet/tools/minver -t v. -m 1.0 -d preview)" >> $GITHUB_ENV
+
+      - name: Login to the Container registry
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+          
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+        
+      - name: Extract metadata (tags, labels) for Docker
+        id: meta
+        uses: docker/metadata-action@v3
+        with:
+          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
+          
+      - name: Lower case REPO
+        run: |
+          echo "GITHUB_REPOSITORY_LOWER_CASE=${GITHUB_REPOSITORY,,}" >> ${GITHUB_ENV}
+          
+      - name: Build and push Docker image
+        uses: docker/build-push-action@v3
+        with:
+          context: ./scripts/
+          file: ./scripts/Dockerfile
+          push: ${{ github.event_name != 'pull_request' }}
+          tags: ${{ env.REGISTRY }}/${{ env.GITHUB_REPOSITORY_LOWER_CASE }}/${{ env.IMAGE_NAME }}:${{ env.MINVERVERSIONOVERRIDE }}
+          labels: ${{ steps.meta.outputs.labels }}
+
diff --git a/infra/.terraform.lock.hcl b/infra/.terraform.lock.hcl
index 3efc788..a4456cd 100644
--- a/infra/.terraform.lock.hcl
+++ b/infra/.terraform.lock.hcl
@@ -2,97 +2,97 @@
 # Manual edits may be lost in future updates.
 
 provider "registry.terraform.io/azure/azapi" {
-  version = "1.9.0"
+  version = "1.11.0"
   hashes = [
-    "h1:yIJQVdnmGZdvS3yrw0M8ke9KiB/c0tjZ7KUXC46Hjx0=",
-    "zh:349569471fbf387feaaf8b88da1690669e201147c342f905e5eb03df42b3cf87",
-    "zh:54346d5fb78cbad3eb7cfd96e1dd7ce4f78666cabaaccfec6ee9437476330018",
-    "zh:64b799da915ea3a9a58ac7a926c6a31c59fd0d911687804d8e815eda88c5580b",
-    "zh:9336ed9e112555e0fda8af6be9ba21478e30117d79ba662233311d9560d2b7c6",
-    "zh:a8aace9897b28ea0b2dbd7a3be3df033e158af40412c9c7670be0956f216ed7e",
-    "zh:ab23df7de700d9e785009a4ca9ceb38ae1ab894a13f5788847f15d018556f415",
-    "zh:b4f13f0b13560a67d427c71c85246f8920f98987120341830071df4535842053",
-    "zh:e58377bf36d8a14d28178a002657865ee17446182dac03525fd43435e41a1b5c",
-    "zh:ea5db4acc6413fd0fe6b35981e58cdc9850f5f3118031cc3d2581de511aee6aa",
-    "zh:f0b32c06c6bd4e4af2c02a62be07b947766aeeb09289a03f21aba16c2fd3c60f",
-    "zh:f1518e766a90c257d7eb36d360dafaf311593a4a9352ff8db0bcfe0ed8cf45ae",
-    "zh:fa89e84cff0776b5b61ff27049b1d8ed52040bd58c81c4628890d644a6fb2989",
+    "h1:nxSbPf052jbk91vEmlJ6JxV7AhJzyxRclLQAiDXORek=",
+    "zh:240ba0f3d87f8faf3171e1dd0ec74bffc868bde84db7fb2c89913c787b11ef07",
+    "zh:422cfbe039f6041525d55aa0641dfed014d970b516d8de058a1869736682b9d3",
+    "zh:4be67c64d73eb3c31706d575436179cc6f6b3dece00709e5721b60512031b2f2",
+    "zh:744b4f68b229c11b3df1198e4ebb4646fa44c14ac5f271337da03917d9fad433",
+    "zh:86927d43f75a8163c2c947fae8d48a63219865e50df437372ee66378826172a1",
+    "zh:a44523fad3a806b2ccee2e81ef206ddaab365eacdec213ec2cce2ddd7d4ed731",
+    "zh:b15c9edac6df2c250ff04f0edae18a9656d19c79c475ef68be5f5c2631059d7e",
+    "zh:d1365f7fe280c11cc7613b4b47798c1f96271c4bb2eed951d6a994790d0b62d4",
+    "zh:e7fea9c180f1f2be6e96152a3b4e0beada3aa585c186f1f3de6be6c74ee858fe",
+    "zh:e8278579b6a18e04a538a1163e257a9be65a3cc35e13a57ea868f179ca03ec28",
+    "zh:fc8f4eeefb44877965eae59e152d82e347261936d47d4d3c448cde5181164ae3",
+    "zh:fd76a6fb2819a1ce56454132c775b721c5028e4f24dd264f2897f345cd4b12ee",
   ]
 }
 
 provider "registry.terraform.io/hashicorp/azuread" {
-  version = "2.44.0"
+  version = "2.47.0"
   hashes = [
-    "h1:oAmmXpj7Pb89vRw0ThEpaU6Aio8TmdQHKfrCFWykRDY=",
-    "zh:06d0061a5ba488a4bdd661117f410f0bce4b103153ddba6854c06a68cf50542b",
-    "zh:16ec95329d12de191eca3f75e066e9d70730fd7ff7b175fe1c61f247b9aba9e0",
+    "h1:zYMGokLn44KSWir7Nr4t8lEAPMB6JuXd2LlP2Ac2tMY=",
+    "zh:1372d81eb24ef3b4b00ea350fe87219f22da51691b8e42ce91d662f6c2a8af5e",
     "zh:1c3e89cf19118fc07d7b04257251fc9897e722c16e0a0df7b07fcd261f8c12e7",
-    "zh:82530c85481617fd2d7e2ad7c6a2318da19dd8a04566c3e39c35d69d9270f4c5",
-    "zh:9879fddc7740e9a68066d820f49e3941b6514cdf72b86162d033a2f9dd490340",
-    "zh:a152d237396cffd701ae184146f3ff9076f617aa72177e11ad76b828ab9518ca",
-    "zh:aef2da192bf30856f317525b2a7a02da277089a562ad5a74edf8fc059bd2ab58",
-    "zh:b142f05ad261afc06bf74a399294df1330c556a69ef9605fc64ae2aa8eebe8e8",
-    "zh:b177d35dec6dfd2b962e6f4239229645c30d1625c3cafde539a2beab312d6914",
-    "zh:ce2c8571e0e0bb350abb312fbfc90795663fd64d43c078477cc75031e3078c87",
-    "zh:e0b1577910276d7cbb11c6002a27f7918a026fd5b5c4170c28a797fe3167a3db",
-    "zh:f42583d31cd3a77d02fafbf8d744a37c4497ac05554f264b30908b621449731d",
+    "zh:1e654a74d171d6ff8f9f6f67e3ff1421d4c5e56a18607703626bf12cd23ba001",
+    "zh:35227fad617a0509c64ab5759a8b703b10d244877f1aa5416bfbcc100c96996f",
+    "zh:357f553f0d78d46a96c7b2ed06d25ee0fc60fc5be19812ccb5d969fa47d62e17",
+    "zh:58faa2940065137e3e87d02eba59ab5cd7137d7a18caf225e660d1788f274569",
+    "zh:7308eda0339620fa24f47cedd22221fc2c02cab9d5be1710c09a783aea84eb3a",
+    "zh:863eabf7f908a8263e28d8aa2ad1381affd6bb5c67755216781f674ef214100e",
+    "zh:8b95b595a7c14ed7b56194d03cdec253527e7a146c1c58961be09e6b5c50baee",
+    "zh:afbca6b4fac9a0a488bc22ff9e51a8f14e986137d25275068fd932f379a51d57",
+    "zh:c6aadec4c81a44c3ffc22c2d90ffc6706bf5a9a903a395d896477516f4be6cbb",
+    "zh:e54a59de7d4ef0f3a18f91fed0b54a2bce18257ae2ee1df8a88226e1023c5811",
   ]
 }
 
 provider "registry.terraform.io/hashicorp/azurerm" {
-  version     = "3.72.0"
-  constraints = "3.72.0"
+  version     = "3.87.0"
+  constraints = "3.87.0"
   hashes = [
-    "h1:xogMjHMWY7pwV6ZB8CSmZi0YJ8PLQrE+maLnQQv5x5g=",
-    "zh:0750326f82dc0765cd9dc0e142b4c325be7918beeacc0b887510274f15d76311",
-    "zh:10ba452905de646181bfbbb9555c7b8fb96138ddc4bb42227521c402c3b12213",
-    "zh:25c8198603cffa0920e6ae39a87a5bb4af75bbe1fba36156e8077ae50261a7ca",
-    "zh:5c294fff683c2fc292f502da43f41bf4b68a20bf60a2e92723768a0ce7fe2c7a",
-    "zh:84449a0e7d5bd4a3fda9a4c9ad287c4c7ebcc5ede406d3ab7593f073d40abdfc",
-    "zh:89f3fc2b3e84e45776fce547ed9fa3dbdba65fe243094fe308c5cef273b4d980",
-    "zh:a8cdfc816fbf14a230c3bb4ccdf70d19069186de78008e49dc9dfaa8aaf0208e",
-    "zh:d6e1d86f2d6d0e09d3961f10f9e26e24a25d39e98ecaf93d5cb089ddb4fea5b6",
-    "zh:e74f0e6c3904da8ff10bdb90be1fd8b20f1d3f14d62d24ffb76b61c623ee0e3c",
-    "zh:e9fb32ef48450b8109e30e47280053ca7d5307190bf6f516e1bebaf556dc8d81",
-    "zh:ee8c9bb7aa318a3d8a313eb032b3fc1a332114fe112723ba7b0c8cb4a5947476",
+    "h1:SqFtup3wvbozutkXVF78LylpKL4nGED1cbk2IbLzhAQ=",
+    "zh:1547ed020fa6ca25d940b28601442c7e4495fdea9fb1ead7affb867383f5f40b",
+    "zh:325e6d636b5ab09a24837194647617c9fabd42f0fb2c7e18ae8d2a8b2d890a55",
+    "zh:3abb0074de1dc3b723f8c209354ba93e717ba52003847484b19369e8f54735f4",
+    "zh:52d2b1700108d5093113a986aa10757834e48083c1358a2c7d8d0360e2689390",
+    "zh:5fe377d5cc80e26766ff411dbcb227728709fe34b14ad106c9e374df653086a4",
+    "zh:747fe80de4fb88b17cac93ff05d62909d3563325c8ed5a461641b48579c328f8",
+    "zh:b40142e4041b7f000ab2dda58309755395a4018d5d00218f6a601f737389865a",
+    "zh:bca622818c221cec81d636879e376c15696a8d091703298195d728b3c1eae7db",
+    "zh:bfaecd203137ff9eb3228b1cbd3191e1d84d7c019855eb5f3071bbf6eb060a51",
+    "zh:d197f04b54f2be07f827ced220954d723039c84793a4ce91894b622982c25811",
+    "zh:e831601ea1f67c5e745946ed3ac0cac772ed8e95ca7d7314d3f0ed631e6eefb1",
     "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c",
   ]
 }
 
 provider "registry.terraform.io/hashicorp/null" {
-  version = "3.2.1"
+  version = "3.2.2"
   hashes = [
-    "h1:vUW21lLLsKlxtBf0QF7LKJreKxs0CM7YXGzqW1N/ODY=",
-    "zh:58ed64389620cc7b82f01332e27723856422820cfd302e304b5f6c3436fb9840",
-    "zh:62a5cc82c3b2ddef7ef3a6f2fedb7b9b3deff4ab7b414938b08e51d6e8be87cb",
-    "zh:63cff4de03af983175a7e37e52d4bd89d990be256b16b5c7f919aff5ad485aa5",
-    "zh:74cb22c6700e48486b7cabefa10b33b801dfcab56f1a6ac9b6624531f3d36ea3",
+    "h1:m467k2tZ9cdFFgHW7LPBK2GLPH43LC6wc3ppxr8yvoE=",
+    "zh:3248aae6a2198f3ec8394218d05bd5e42be59f43a3a7c0b71c66ec0df08b69e7",
+    "zh:32b1aaa1c3013d33c245493f4a65465eab9436b454d250102729321a44c8ab9a",
+    "zh:38eff7e470acb48f66380a73a5c7cdd76cc9b9c9ba9a7249c7991488abe22fe3",
+    "zh:4c2f1faee67af104f5f9e711c4574ff4d298afaa8a420680b0cb55d7bbc65606",
+    "zh:544b33b757c0b954dbb87db83a5ad921edd61f02f1dc86c6186a5ea86465b546",
+    "zh:696cf785090e1e8cf1587499516b0494f47413b43cb99877ad97f5d0de3dc539",
+    "zh:6e301f34757b5d265ae44467d95306d61bef5e41930be1365f5a8dcf80f59452",
     "zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3",
-    "zh:79e553aff77f1cfa9012a2218b8238dd672ea5e1b2924775ac9ac24d2a75c238",
-    "zh:a1e06ddda0b5ac48f7e7c7d59e1ab5a4073bbcf876c73c0299e4610ed53859dc",
-    "zh:c37a97090f1a82222925d45d84483b2aa702ef7ab66532af6cbcfb567818b970",
-    "zh:e4453fbebf90c53ca3323a92e7ca0f9961427d2f0ce0d2b65523cc04d5d999c2",
-    "zh:e80a746921946d8b6761e77305b752ad188da60688cfd2059322875d363be5f5",
-    "zh:fbdb892d9822ed0e4cb60f2fedbdbb556e4da0d88d3b942ae963ed6ff091e48f",
-    "zh:fca01a623d90d0cad0843102f9b8b9fe0d3ff8244593bd817f126582b52dd694",
+    "zh:913a929070c819e59e94bb37a2a253c228f83921136ff4a7aa1a178c7cce5422",
+    "zh:aa9015926cd152425dbf86d1abdbc74bfe0e1ba3d26b3db35051d7b9ca9f72ae",
+    "zh:bb04798b016e1e1d49bcc76d62c53b56c88c63d6f2dfe38821afef17c416a0e1",
+    "zh:c23084e1b23577de22603cff752e59128d83cfecc2e6819edadd8cf7a10af11e",
   ]
 }
 
 provider "registry.terraform.io/hashicorp/random" {
-  version = "3.5.1"
+  version = "3.6.0"
   hashes = [
-    "h1:3hjTP5tQBspPcFAJlfafnWrNrKnr7J4Cp0qB9jbqf30=",
-    "zh:04e3fbd610cb52c1017d282531364b9c53ef72b6bc533acb2a90671957324a64",
-    "zh:119197103301ebaf7efb91df8f0b6e0dd31e6ff943d231af35ee1831c599188d",
-    "zh:4d2b219d09abf3b1bb4df93d399ed156cadd61f44ad3baf5cf2954df2fba0831",
-    "zh:6130bdde527587bbe2dcaa7150363e96dbc5250ea20154176d82bc69df5d4ce3",
-    "zh:6cc326cd4000f724d3086ee05587e7710f032f94fc9af35e96a386a1c6f2214f",
+    "h1:t0mRdJzegohRKhfdoQEJnv3JRISSezJRblN0HIe67vo=",
+    "zh:03360ed3ecd31e8c5dac9c95fe0858be50f3e9a0d0c654b5e504109c2159287d",
+    "zh:1c67ac51254ba2a2bb53a25e8ae7e4d076103483f55f39b426ec55e47d1fe211",
+    "zh:24a17bba7f6d679538ff51b3a2f378cedadede97af8a1db7dad4fd8d6d50f829",
+    "zh:30ffb297ffd1633175d6545d37c2217e2cef9545a6e03946e514c59c0859b77d",
+    "zh:454ce4b3dbc73e6775f2f6605d45cee6e16c3872a2e66a2c97993d6e5cbd7055",
     "zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3",
-    "zh:b6d88e1d28cf2dfa24e9fdcc3efc77adcdc1c3c3b5c7ce503a423efbdd6de57b",
-    "zh:ba74c592622ecbcef9dc2a4d81ed321c4e44cddf7da799faa324da9bf52a22b2",
-    "zh:c7c5cde98fe4ef1143bd1b3ec5dc04baf0d4cc3ca2c5c7d40d17c0e9b2076865",
-    "zh:dac4bad52c940cd0dfc27893507c1e92393846b024c5a9db159a93c534a3da03",
-    "zh:de8febe2a2acd9ac454b844a4106ed295ae9520ef54dc8ed2faf29f12716b602",
-    "zh:eab0d0495e7e711cca367f7d4df6e322e6c562fc52151ec931176115b83ed014",
+    "zh:91df0a9fab329aff2ff4cf26797592eb7a3a90b4a0c04d64ce186654e0cc6e17",
+    "zh:aa57384b85622a9f7bfb5d4512ca88e61f22a9cea9f30febaa4c98c68ff0dc21",
+    "zh:c4a3e329ba786ffb6f2b694e1fd41d413a7010f3a53c20b432325a94fa71e839",
+    "zh:e2699bc9116447f96c53d55f2a00570f982e6f9935038c3810603572693712d0",
+    "zh:e747c0fd5d7684e5bfad8aa0ca441903f15ae7a98a737ff6aca24ba223207e2c",
+    "zh:f1ca75f417ce490368f047b63ec09fd003711ae48487fba90b4aba2ccf71920e",
   ]
 }
diff --git a/infra/README.md b/infra/README.md
index 366638a..8b2d9f9 100644
--- a/infra/README.md
+++ b/infra/README.md
@@ -12,39 +12,4 @@ terraform apply
 
 ## Manual steps
 
-> This is temporal
-
-Clone the GitHub repository [cmendible/azure-search-openai-demo](https://github.com/cmendible/azure-search-openai-demo) and run the following commands to deploy the Azure Search Index and upload the sample documents:
-
-```bash
-git clone https://github.com/cmendible/azure-search-openai-demo.git
-cd azure-search-openai-demo
-git checkout k8s
-
-export AZURE_PRINCIPAL_ID="<principal id>"
-export AZURE_RESOURCE_GROUP="<resource group>" 
-export AZURE_SUBSCRIPTION_ID="<subscription id>"
-export AZURE_TENANT_ID="<azure tenant id>"
-export AZURE_STORAGE_ACCOUNT="<storage account name>"
-export AZURE_STORAGE_CONTAINER="content"
-export AZURE_SEARCH_SERVICE="<search service name>"
-export OPENAI_HOST="azure"
-export AZURE_OPENAI_SERVICE="<openai service name>"
-export OPENAI_API_KEY=""
-export OPENAI_ORGANIZATION=""
-export AZURE_OPENAI_EMB_DEPLOYMENT="text-embedding-ada-002"
-export AZURE_OPENAI_EMB_MODEL_NAME="text-embedding-ada-002"
-export AZURE_SEARCH_INDEX="gptkbindex"
-```
-
-Login to Azure:
-
-```bash
-azd auth login --client-id <client-id> --client-secret <client-password> --tenant-id <tenant-id>
-```
-
-Deploy the Azure Search Index and upload the sample documents:
-
-```bash
-./scripts/prepdocs.sh
-```
\ No newline at end of file
+TODO: Describe these steps
diff --git a/infra/main.tf b/infra/main.tf
index d40e71a..3d26b1e 100644
--- a/infra/main.tf
+++ b/infra/main.tf
@@ -4,14 +4,30 @@ resource "random_id" "random" {
   byte_length = 8
 }
 
-resource "azurerm_resource_group" "rg" {
-  name     = var.resource_group_name
-  location = var.location
+locals {
+  sufix                   = var.use_random_suffix ? substr(lower(random_id.random.hex), 1, 4) : ""
+  name_sufix              = var.use_random_suffix ? "-${local.sufix}" : ""
+  resource_group_name     = "${var.resource_group_name}${local.name_sufix}"
+  storage_account_name    = "${var.storage_account_name}${local.sufix}"
+  azopenai_name           = "${var.azopenai_name}${local.name_sufix}"
+  content_safety_name     = "${var.content_safety_name}${local.name_sufix}"
+  cognitive_services_name = "${var.cognitive_services_name}${local.name_sufix}"
+  speech_name             = "${var.speech_name}${local.name_sufix}"
+  bing_name               = "${var.bing_name}${local.name_sufix}"
+  search_name             = "${var.search_name}${local.name_sufix}"
+  form_recognizer_name    = "${var.form_recognizer_name}${local.name_sufix}"
+  apim_name               = "${var.apim_name}${local.name_sufix}"
+  appi_name               = "${var.appi_name}${local.name_sufix}"
+  log_name                = "${var.log_name}${local.name_sufix}"
+  cae_name                = "${var.cae_name}${local.name_sufix}"
+  ca_chat_name            = "${var.ca_chat_name}${local.name_sufix}"
+  ca_prep_docs_name       = "${var.ca_prep_docs_name}${local.name_sufix}"
+  ca_aihub_name           = "${var.ca_aihub_name}${local.name_sufix}"
 }
 
-locals {
-  name_sufix           = substr(lower(random_id.random.hex), 1, 4)
-  storage_account_name = "${var.storage_account_name}${local.name_sufix}"
+resource "azurerm_resource_group" "rg" {
+  name     = local.resource_group_name
+  location = var.location
 }
 
 module "vnet" {
@@ -37,7 +53,7 @@ module "apim" {
   source                   = "./modules/apim"
   location                 = azurerm_resource_group.rg.location
   resource_group_name      = azurerm_resource_group.rg.name
-  apim_name                = var.apim_name
+  apim_name                = local.apim_name
   apim_subnet_id           = module.vnet.apim_subnet_id
   publisher_name           = var.publisher_name
   publisher_email          = var.publisher_email
@@ -68,7 +84,7 @@ module "search" {
   source              = "./modules/search"
   location            = azurerm_resource_group.rg.location
   resource_group_name = azurerm_resource_group.rg.name
-  search_name         = var.search_name
+  search_name         = local.search_name
   principal_id        = module.mi.principal_id
 }
 
@@ -76,21 +92,21 @@ module "form_recognizer" {
   source               = "./modules/form"
   location             = azurerm_resource_group.rg.location
   resource_group_name  = azurerm_resource_group.rg.name
-  form_recognizer_name = var.form_recognizer_name
+  form_recognizer_name = local.form_recognizer_name
 }
 
 module "log" {
   source              = "./modules/log"
   location            = azurerm_resource_group.rg.location
   resource_group_name = azurerm_resource_group.rg.name
-  log_name            = var.log_name
+  log_name            = local.log_name
 }
 
 module "appi" {
   source              = "./modules/appi"
   location            = azurerm_resource_group.rg.location
   resource_group_name = azurerm_resource_group.rg.name
-  appi_name           = var.appi_name
+  appi_name           = local.appi_name
   log_id              = module.log.log_id
 }
 
@@ -106,27 +122,38 @@ module "openai" {
   source              = "./modules/openai"
   location            = azurerm_resource_group.rg.location
   resource_group_name = azurerm_resource_group.rg.name
-  secondary_location  = var.secondary_location
-  azopenai_name       = var.azopenai_name
+  azopenai_name       = local.azopenai_name
   principal_id        = module.mi.principal_id
 }
 
+module "cog" {
+  source                  = "./modules/cog"
+  location                = azurerm_resource_group.rg.location
+  resource_group_name     = azurerm_resource_group.rg.name
+  resource_group_id       = azurerm_resource_group.rg.id
+  principal_id            = module.mi.principal_id
+  bing_name               = local.bing_name
+  cognitive_services_name = local.cognitive_services_name
+  content_safety_name     = local.content_safety_name
+  speech_name             = local.speech_name
+}
+
 module "cae" {
   source            = "./modules/cae"
   location          = azurerm_resource_group.rg.location
   resource_group_id = azurerm_resource_group.rg.id
-  cae_name          = var.cae_name
+  cae_name          = local.cae_name
   cae_subnet_id     = module.vnet.cae_subnet_id
   log_workspace_id  = module.log.log_workspace_id
   log_key           = module.log.log_key
   appi_key          = module.appi.appi_key
 }
 
-module "ca_back" {
-  source                         = "./modules/ca-back"
+module "ca_chat" {
+  source                         = "./modules/ca-chat"
   location                       = azurerm_resource_group.rg.location
   resource_group_id              = azurerm_resource_group.rg.id
-  ca_name                        = var.ca_back_name
+  ca_name                        = local.ca_chat_name
   cae_id                         = module.cae.cae_id
   managed_identity_id            = module.mi.mi_id
   chat_gpt_deployment            = module.openai.gpt_deployment_name
@@ -137,42 +164,57 @@ module "ca_back" {
   storage_container_name         = module.st.storage_container_name
   search_service_name            = module.search.search_service_name
   search_index_name              = module.search.search_index_name
-  openai_service_name            = var.enable_apim ? module.apim.gateway_url : module.openai.openai_endpoint
+  openai_endpoint                = var.enable_apim ? module.apim.gateway_url : module.openai.openai_endpoint
   tenant_id                      = data.azurerm_subscription.current.tenant_id
   managed_identity_client_id     = module.mi.client_id
   enable_entra_id_authentication = var.enable_entra_id_authentication
 }
 
-# module "ca_webapi" {
-#   source                     = "./modules/ca-webapi"
-#   location                   = azurerm_resource_group.rg.location
-#   resource_group_id          = azurerm_resource_group.rg.id
-#   ca_name                    = var.ca_webapi_name
-#   cae_id                     = module.cae.cae_id
-#   cae_default_domain         = module.cae.defaultDomain
-#   ca_webapp_name             = var.ca_webapp_name
-#   managed_identity_id        = module.mi.mi_id
-#   chat_gpt_deployment        = module.openai.gpt_deployment_name
-#   chat_gpt_model             = module.openai.gpt_deployment_name
-#   embeddings_deployment      = module.openai.embedding_deployment_name
-#   embeddings_model           = module.openai.embedding_deployment_name
-#   storage_account_name       = module.st.storage_account_name
-#   storage_container_name     = module.st.storage_container_name
-#   search_service_name        = module.search.search_service_name
-#   search_index_name          = module.search.search_index_name
-#   openai_service_name        = module.openai.openai_service_name
-#   tenant_id                  = data.azurerm_subscription.current.tenant_id
-#   managed_identity_client_id = module.mi.client_id
-# }
-
-# module "ca_webapp" {
-#   source                     = "./modules/ca-webapp"
-#   location                   = azurerm_resource_group.rg.location
-#   resource_group_id          = azurerm_resource_group.rg.id
-#   ca_name                    = var.ca_webapp_name
-#   cae_id                     = module.cae.cae_id
-#   managed_identity_id        = module.mi.mi_id
-#   tenant_id                  = data.azurerm_subscription.current.tenant_id
-#   managed_identity_client_id = module.mi.client_id
-#   backend_url                = module.ca_webapi.fqdn
-# }
+module "ca_prep_docs" {
+  source                     = "./modules/ca-prep-docs"
+  location                   = azurerm_resource_group.rg.location
+  resource_group_id          = azurerm_resource_group.rg.id
+  ca_name                    = local.ca_prep_docs_name
+  cae_id                     = module.cae.cae_id
+  managed_identity_id        = module.mi.mi_id
+  storage_account_name       = module.st.storage_account_name
+  storage_account_key        = module.st.key
+  search_service_name        = module.search.search_service_name
+  tenant_id                  = data.azurerm_subscription.current.tenant_id
+  managed_identity_client_id = module.mi.client_id
+  openai_service_name        = module.openai.openai_service_name
+  resource_group_name        = azurerm_resource_group.rg.name
+  subscription_id            = data.azurerm_subscription.current.subscription_id
+}
+
+module "ca_aihub" {
+  source                     = "./modules/ca-aihub"
+  location                   = azurerm_resource_group.rg.location
+  resource_group_id          = azurerm_resource_group.rg.id
+  ca_name                    = local.ca_aihub_name
+  cae_id                     = module.cae.cae_id
+  managed_identity_id        = module.mi.mi_id
+  chat_gpt_deployment        = module.openai.gpt_deployment_name
+  chat_gpt_model             = module.openai.gpt_deployment_name
+  embeddings_deployment      = module.openai.embedding_deployment_name
+  embeddings_model           = module.openai.embedding_deployment_name
+  storage_account_name       = module.st.storage_account_name
+  storage_container_name     = module.st.storage_container_name
+  search_service_name        = module.search.search_service_name
+  search_index_name          = module.search.search_index_name
+  openai_endpoint            = var.enable_apim ? module.apim.gateway_url : module.openai.openai_endpoint
+  chat_fqdn                  = module.ca_chat.fqdn
+  pbi_report_link            = var.pbi_report_link
+  content_safety_endpoint    = module.cog.content_safety_endpoint
+  content_safety_key         = module.cog.content_safety_key
+  cognitive_service_endpoint = module.cog.cognitive_service_endpoint
+  cognitive_service_key      = module.cog.cognitive_service_key
+  speech_key                 = module.cog.speech_key
+
+  storage_connection_string = module.st.connection_string
+  bing_key                  = var.bing_key
+
+  tenant_id                      = data.azurerm_subscription.current.tenant_id
+  managed_identity_client_id     = module.mi.client_id
+  enable_entra_id_authentication = var.enable_entra_id_authentication
+}
diff --git a/infra/modules/apim/main.tf b/infra/modules/apim/main.tf
index d283524..7f09a66 100644
--- a/infra/modules/apim/main.tf
+++ b/infra/modules/apim/main.tf
@@ -3,6 +3,18 @@ locals {
   backend_url = "${var.openai_service_endpoint}openai"
 }
 
+resource "azurerm_public_ip" "apim_public_ip" {
+  count               = var.enable_apim ? 1 : 0
+  name                = "pip-apim"
+  location            = var.location
+  resource_group_name = var.resource_group_name
+  allocation_method   = "Static"
+  sku                 = "Standard"
+  ip_tags             = {}
+  zones               = ["1", "2", "3"]
+  domain_name_label   = var.apim_name
+}
+
 resource "azurerm_api_management" "apim" {
   count                = var.enable_apim ? 1 : 0
   name                 = var.apim_name
@@ -12,6 +24,7 @@ resource "azurerm_api_management" "apim" {
   publisher_email      = var.publisher_email
   sku_name             = "Developer_1"
   virtual_network_type = "External" # Use "Internal" for a fully private APIM
+  public_ip_address_id = azurerm_public_ip.apim_public_ip[0].id // Required to deploy APIM in STv2 platform
 
   virtual_network_configuration {
     subnet_id = var.apim_subnet_id
@@ -31,7 +44,6 @@ resource "azurerm_api_management_backend" "openai" {
   }
 }
 
-// TODO: https://learn.microsoft.com/en-us/azure/api-management/api-management-howto-log-event-hubs?tabs=bicep#logger-with-system-assigned-managed-identity-credentialss
 resource "azurerm_api_management_logger" "appi_logger" {
   count               = var.enable_apim ? 1 : 0
   name                = local.logger_name
diff --git a/infra/modules/ca-back/auth_config.tf b/infra/modules/ca-aihub/auth_config.tf
similarity index 93%
rename from infra/modules/ca-back/auth_config.tf
rename to infra/modules/ca-aihub/auth_config.tf
index d5fee17..f487b6f 100644
--- a/infra/modules/ca-back/auth_config.tf
+++ b/infra/modules/ca-aihub/auth_config.tf
@@ -62,7 +62,6 @@ resource "null_resource" "update_redirect_uris" {
     azapi_resource.current
   ]
   triggers = {
-    input_json                   = md5(local.update_redirect_uris_command)
-    update_redirect_uris_command = local.update_redirect_uris_command
+    always_run = timestamp()
   }
 }
diff --git a/infra/modules/ca-aihub/main.tf b/infra/modules/ca-aihub/main.tf
new file mode 100644
index 0000000..5159ea8
--- /dev/null
+++ b/infra/modules/ca-aihub/main.tf
@@ -0,0 +1,208 @@
+resource "azapi_resource" "ca_back" {
+  name      = var.ca_name
+  location  = var.location
+  parent_id = var.resource_group_id
+  type      = "Microsoft.App/containerApps@2022-11-01-preview"
+  identity {
+    type = "UserAssigned"
+    identity_ids = [
+      var.managed_identity_id
+    ]
+  }
+
+  body = jsonencode({
+    properties : {
+      managedEnvironmentId = "${var.cae_id}"
+      configuration = {
+        secrets = [
+          {
+            name  = "microsoft-provider-authentication-secret"
+            value = "${var.enable_entra_id_authentication ? module.sp[0].password : "None"}"
+          },
+          {
+            name  = "content-safety-key"
+            value = "${var.content_safety_key}"
+          },
+          {
+            name  = "cognitive-service-key",
+            value = "${var.cognitive_service_key}"
+          },
+          {
+            name  = "speech-key",
+            value = "${var.speech_key}"
+          },
+          {
+            name  = "storage-connection-string"
+            value = "${var.storage_connection_string}"
+          },
+          {
+            name  = "bing-key"
+            value = "${var.bing_key}"
+          }
+        ]
+        ingress = {
+          external   = true
+          targetPort = 8080
+          transport  = "Http"
+
+          traffic = [
+            {
+              latestRevision = true
+              weight         = 100
+            }
+          ]
+        }
+        dapr = {
+          enabled = false
+        }
+      }
+      template = {
+        containers = [
+          {
+            name  = "aihub"
+            image = "ghcr.io/azure/aihub/aihub:1.0.0-preview.0"
+            resources = {
+              cpu    = 0.5
+              memory = "1Gi"
+            }
+            env = [
+              {
+                name  = "Logging__LogLevel__Default",
+                value = "Information"
+              },
+              {
+                name  = "Logging__LogLevel__Microsoft.AspNetCore",
+                value = "Warning"
+              },
+              {
+                name  = "ContentModerator__Endpoint",
+                value = "${var.content_safety_endpoint}"
+              },
+              {
+                name      = "ContentModerator__SubscriptionKey",
+                secretRef = "content-safety-key"
+              },
+              {
+                name  = "BrandAnalyzer__BingEndpoint",
+                value = "https://api.bing.microsoft.com/v7.0/search"
+              },
+              {
+                name      = "BrandAnalyzer__BingKey",
+                secretRef = "bing-key"
+              },
+              {
+                name  = "BrandAnalyzer__OpenAIEndpoint",
+                value = "${var.openai_endpoint}"
+              },
+              {
+                name  = "BrandAnalyzer__OpenAISubscriptionKey",
+                value = ""
+              },
+              {
+                name  = "CallCenter__OpenAIEndpoint",
+                value = "${var.openai_endpoint}"
+              },
+              {
+                name  = "CallCenter__OpenAISubscriptionKey",
+                value = ""
+              },
+              {
+                name  = "ImageAnalyzer__VisionEndpoint",
+                value = "${var.cognitive_service_endpoint}computervision/imageanalysis:analyze?api-version=2023-02-01-preview&features=denseCaptions&language=en&gender-neutral-caption=False"
+              },
+              {
+                name  = "ImageAnalyzer__OCREndpoint",
+                value = "${var.cognitive_service_endpoint}computervision/imageanalysis:analyze?api-version=2023-02-01-preview&features=read&gender-neutral-caption=False"
+              },
+              {
+                name      = "ImageAnalyzer__VisionSubscriptionKey",
+                secretRef = "cognitive-service-key"
+              },
+              {
+                name  = "ImageAnalyzer__OpenAIEndpoint",
+                value = "${var.openai_endpoint}"
+              },
+              {
+                name  = "ImageAnalyzer__OpenAISubscriptionKey",
+                value = ""
+              },
+              {
+                name  = "ImageAnalyzer__ContainerName",
+                value = "image-analyzer"
+              },
+              {
+                name  = "FormAnalyzer__FormRecogEndpoint",
+                value = "${var.cognitive_service_endpoint}formrecognizer/documentModels/prebuilt-layout:analyze?api-version=2023-07-31"
+              },
+              {
+                name      = "FormAnalyzer__FormRecogSubscriptionKey",
+                secretRef = "cognitive-service-key"
+              },
+              {
+                name  = "FormAnalyzer__OpenAIEndpoint",
+                value = "${var.openai_endpoint}"
+              },
+              {
+                name  = "FormAnalyzer__OpenAISubscriptionKey",
+                value = ""
+              },
+              {
+                name  = "FormAnalyzer__ContainerName",
+                value = "form-analyzer"
+              },
+              {
+                name      = "Storage__ConnectionString",
+                secretRef = "storage-connection-string"
+              },
+              {
+                name  = "Storage__ContainerName",
+                value = "image-moderator"
+              },
+              {
+                name  = "AudioTranscription__SpeechLocation",
+                value = "${var.location}"
+              },
+              {
+                name      = "AudioTranscription__SpeechSubscriptionKey",
+                secretRef = "speech-key"
+              },
+              {
+                name  = "AudioTranscription__ContainerName",
+                value = "audio-files"
+              },
+              {
+                name  = "ChatOnYourData__Link",
+                value = "https://${var.chat_fqdn}"
+              },
+              {
+                name  = "PBIReport__Link",
+                value = "${var.pbi_report_link}"
+              },
+              {
+                name  = "AllowedHosts",
+                value = "*"
+              },
+              {
+                name  = "AZURE_TENANT_ID"
+                value = "${var.tenant_id}"
+              },
+              {
+                name  = "AZURE_CLIENT_ID"
+                value = "${var.managed_identity_client_id}"
+              },
+              {
+                name  = "ASPNETCORE_ENVIRONMENT"
+                value = "Development"
+              }
+            ],
+          },
+        ]
+        scale = {
+          minReplicas = 1
+          maxReplicas = 1
+        }
+      }
+    }
+  })
+  response_export_values = ["properties.configuration.ingress.fqdn"]
+}
diff --git a/infra/modules/ca-back/outputs.tf b/infra/modules/ca-aihub/outputs.tf
similarity index 100%
rename from infra/modules/ca-back/outputs.tf
rename to infra/modules/ca-aihub/outputs.tf
diff --git a/infra/modules/ca-webapi/providers.tf b/infra/modules/ca-aihub/providers.tf
similarity index 92%
rename from infra/modules/ca-webapi/providers.tf
rename to infra/modules/ca-aihub/providers.tf
index 30260c4..c4fa93c 100644
--- a/infra/modules/ca-webapi/providers.tf
+++ b/infra/modules/ca-aihub/providers.tf
@@ -3,7 +3,7 @@ terraform {
   required_providers {
     azurerm = {
       source  = "hashicorp/azurerm"
-      version = "3.72.0"
+      version = "3.87.0"
     }
     azapi = {
       source = "Azure/azapi"
diff --git a/infra/modules/ca-webapi/variables.tf b/infra/modules/ca-aihub/variables.tf
similarity index 55%
rename from infra/modules/ca-webapi/variables.tf
rename to infra/modules/ca-aihub/variables.tf
index 54e5b95..8c684cc 100644
--- a/infra/modules/ca-webapi/variables.tf
+++ b/infra/modules/ca-aihub/variables.tf
@@ -2,13 +2,10 @@ variable "resource_group_id" {}
 variable "location" {}
 variable "ca_name" {}
 variable "cae_id" {}
-variable "cae_default_domain" {}
 variable "managed_identity_id" {}
 variable "managed_identity_client_id" {}
 variable "tenant_id" {}
 
-variable "ca_webapp_name" {}
-
 variable "storage_account_name" {}
 variable "storage_container_name" {}
 variable "search_service_name" {}
@@ -17,4 +14,17 @@ variable "chat_gpt_deployment" {}
 variable "chat_gpt_model" {}
 variable "embeddings_deployment" {}
 variable "embeddings_model" {}
-variable "openai_service_name" {}
+variable "openai_endpoint" {}
+
+variable "chat_fqdn" {}
+variable "pbi_report_link" {}
+variable "content_safety_endpoint" {}
+variable "content_safety_key" {}
+variable "cognitive_service_endpoint" {}
+variable "cognitive_service_key" {}
+variable "speech_key" {}
+
+variable "storage_connection_string" {}
+variable "bing_key" {}
+
+variable "enable_entra_id_authentication" {}
diff --git a/infra/modules/ca-chat/auth_config.tf b/infra/modules/ca-chat/auth_config.tf
new file mode 100644
index 0000000..f487b6f
--- /dev/null
+++ b/infra/modules/ca-chat/auth_config.tf
@@ -0,0 +1,67 @@
+locals {
+  redirect_fqdn = jsondecode(azapi_resource.ca_back.output).properties.configuration.ingress.fqdn
+}
+
+module "sp" {
+  count   = var.enable_entra_id_authentication ? 1 : 0
+  source  = "../sp"
+  sp_name = var.ca_name
+}
+
+resource "azapi_resource" "current" {
+  count     = var.enable_entra_id_authentication ? 1 : 0
+  type      = "Microsoft.App/containerApps/authConfigs@2023-05-01"
+  name      = "Current"
+  parent_id = azapi_resource.ca_back.id
+  timeouts {}
+  body = jsonencode({
+    properties = {
+      platform = {
+        enabled = true
+      }
+      globalValidation = {
+        redirectToProvider          = "azureactivedirectory"
+        unauthenticatedClientAction = "RedirectToLoginPage"
+      }
+      identityProviders = {
+        azureActiveDirectory = {
+          enabled           = true
+          isAutoProvisioned = true
+          registration = {
+            clientId                = "${module.sp[0].client_id}"
+            clientSecretSettingName = "microsoft-provider-authentication-secret"
+            openIdIssuer            = "https://sts.windows.net/${var.tenant_id}/v2.0"
+          }
+          validation = {
+            allowedAudiences = [
+              "api://${module.sp[0].client_id}"
+            ]
+          }
+        }
+      }
+      login = {
+        preserveUrlFragmentsForLogins = false
+      }
+    }
+  })
+}
+
+locals {
+  fqdn                         = jsondecode(azapi_resource.ca_back.output).properties.configuration.ingress.fqdn
+  update_redirect_uris_command = var.enable_entra_id_authentication ? "az ad app update --id ${module.sp[0].client_id} --web-redirect-uris https://${local.fqdn}/.auth/login/aad/callback" : ""
+}
+
+resource "null_resource" "update_redirect_uris" {
+  count = var.enable_entra_id_authentication ? 1 : 0
+  provisioner "local-exec" {
+    command = local.update_redirect_uris_command
+  }
+  depends_on = [
+    module.sp,
+    azapi_resource.ca_back,
+    azapi_resource.current
+  ]
+  triggers = {
+    always_run = timestamp()
+  }
+}
diff --git a/infra/modules/ca-back/main.tf b/infra/modules/ca-chat/main.tf
similarity index 98%
rename from infra/modules/ca-back/main.tf
rename to infra/modules/ca-chat/main.tf
index 65f491e..a1c251e 100644
--- a/infra/modules/ca-back/main.tf
+++ b/infra/modules/ca-chat/main.tf
@@ -80,7 +80,7 @@ resource "azapi_resource" "ca_back" {
               },
               {
                 name  = "AZURE_OPENAI_SERVICE"
-                value = "${var.openai_service_name}"
+                value = "${var.openai_endpoint}"
               },
               {
                 name  = "AZURE_TENANT_ID"
diff --git a/infra/modules/ca-chat/outputs.tf b/infra/modules/ca-chat/outputs.tf
new file mode 100644
index 0000000..c0e0a25
--- /dev/null
+++ b/infra/modules/ca-chat/outputs.tf
@@ -0,0 +1,4 @@
+output "fqdn" {
+  value = jsondecode(azapi_resource.ca_back.output).properties.configuration.ingress.fqdn
+}
+
diff --git a/infra/modules/ca-webapp/providers.tf b/infra/modules/ca-chat/providers.tf
similarity index 92%
rename from infra/modules/ca-webapp/providers.tf
rename to infra/modules/ca-chat/providers.tf
index 30260c4..c4fa93c 100644
--- a/infra/modules/ca-webapp/providers.tf
+++ b/infra/modules/ca-chat/providers.tf
@@ -3,7 +3,7 @@ terraform {
   required_providers {
     azurerm = {
       source  = "hashicorp/azurerm"
-      version = "3.72.0"
+      version = "3.87.0"
     }
     azapi = {
       source = "Azure/azapi"
diff --git a/infra/modules/ca-back/variables.tf b/infra/modules/ca-chat/variables.tf
similarity index 93%
rename from infra/modules/ca-back/variables.tf
rename to infra/modules/ca-chat/variables.tf
index 99bfdb4..6ddb039 100644
--- a/infra/modules/ca-back/variables.tf
+++ b/infra/modules/ca-chat/variables.tf
@@ -14,6 +14,6 @@ variable "chat_gpt_deployment" {}
 variable "chat_gpt_model" {}
 variable "embeddings_deployment" {}
 variable "embeddings_model" {}
-variable "openai_service_name" {}
+variable "openai_endpoint" {}
 
 variable "enable_entra_id_authentication" {}
diff --git a/infra/modules/ca-prep-docs/main.tf b/infra/modules/ca-prep-docs/main.tf
new file mode 100644
index 0000000..34aa605
--- /dev/null
+++ b/infra/modules/ca-prep-docs/main.tf
@@ -0,0 +1,120 @@
+resource "azapi_resource" "ca_back" {
+  name      = var.ca_name
+  location  = var.location
+  parent_id = var.resource_group_id
+  type      = "Microsoft.App/jobs@2023-05-01"
+  identity {
+    type = "UserAssigned"
+    identity_ids = [
+      var.managed_identity_id
+    ]
+  }
+
+  body = jsonencode({
+    properties : {
+      environmentId = "${var.cae_id}"
+      configuration = {
+        manualTriggerConfig = {
+          parallelism            = 1
+          replicaCompletionCount = 1
+        }
+        secrets        = []
+        triggerType    = "Manual"
+        replicaTimeout = 3600
+      }
+      template = {
+        containers = [
+          {
+            name  = "aihub-prepdocs"
+            image = "ghcr.io/azure/activate-genai/aihub-prepdocs:1.0.0-preview.0"
+            resources = {
+              cpu    = 0.5
+              memory = "1Gi"
+            }
+            env = [
+              {
+                name  = "OPENAI_HOST"
+                value = "azure"
+              },
+              {
+                name  = "AZURE_OPENAI_EMB_DEPLOYMENT"
+                value = "text-embedding-ada-002"
+              },
+              {
+                name  = "AZURE_OPENAI_EMB_MODEL_NAME"
+                value = "text-embedding-ada-002"
+              },
+              {
+                name  = "AZURE_STORAGE_CONTAINER"
+                value = "content"
+              },
+              {
+                name  = "AZURE_SEARCH_INDEX"
+                value = "gptkbindex"
+              },
+              {
+                name  = "OPENAI_API_KEY"
+                value = ""
+              },
+              {
+                name  = "OPENAI_ORGANIZATION"
+                value = ""
+              },
+              {
+                name  = "AZURE_RESOURCE_GROUP"
+                value = "${var.resource_group_name}"
+              },
+              {
+                name  = "AZURE_SUBSCRIPTION_ID"
+                value = "${var.subscription_id}"
+              },
+              {
+                name  = "AZURE_STORAGE_ACCOUNT"
+                value = "${var.storage_account_name}"
+              },
+              {
+                name  = "AZURE_SEARCH_SERVICE"
+                value = "${var.search_service_name}"
+              },
+              {
+                name  = "AZURE_OPENAI_SERVICE"
+                value = "${var.openai_service_name}"
+              },
+              {
+                name  = "AZURE_TENANT_ID"
+                value = "${var.tenant_id}"
+              },
+              {
+                name  = "AZURE_CLIENT_ID"
+                value = "${var.managed_identity_client_id}"
+              },
+            ],
+            volumeMounts = [
+              {
+                volumeName = "staging-volume"
+                mountPath  = "/data"
+              }
+            ]
+          },
+        ]
+        volumes = [
+          {
+            name        = "staging-volume"
+            storageName = "${azurerm_container_app_environment_storage.data.name}"
+            storageType = "AzureFile"
+          }
+        ]
+      }
+    }
+  })
+  response_export_values = ["properties.configuration.ingress.fqdn"]
+}
+
+resource "azurerm_container_app_environment_storage" "data" {
+  name                         = "stagingstorage"
+  container_app_environment_id = var.cae_id
+  account_name                 = var.storage_account_name
+  share_name                   = "staging"
+  access_key                   = var.storage_account_key
+  access_mode                  = "ReadWrite"
+}
diff --git a/infra/modules/ca-back/providers.tf b/infra/modules/ca-prep-docs/providers.tf
similarity index 92%
rename from infra/modules/ca-back/providers.tf
rename to infra/modules/ca-prep-docs/providers.tf
index 30260c4..c4fa93c 100644
--- a/infra/modules/ca-back/providers.tf
+++ b/infra/modules/ca-prep-docs/providers.tf
@@ -3,7 +3,7 @@ terraform {
   required_providers {
     azurerm = {
       source  = "hashicorp/azurerm"
-      version = "3.72.0"
+      version = "3.87.0"
     }
     azapi = {
       source = "Azure/azapi"
diff --git a/infra/modules/ca-prep-docs/variables.tf b/infra/modules/ca-prep-docs/variables.tf
new file mode 100644
index 0000000..7f3f075
--- /dev/null
+++ b/infra/modules/ca-prep-docs/variables.tf
@@ -0,0 +1,13 @@
+variable "resource_group_name" {}
+variable "resource_group_id" {}
+variable "subscription_id" {}
+variable "location" {}
+variable "ca_name" {}
+variable "cae_id" {}
+variable "managed_identity_id" {}
+variable "managed_identity_client_id" {}
+variable "tenant_id" {}
+variable "storage_account_name" {}
+variable "storage_account_key" {}
+variable "search_service_name" {}
+variable "openai_service_name" {}
diff --git a/infra/modules/ca-webapi/main.tf b/infra/modules/ca-webapi/main.tf
deleted file mode 100644
index 0386e29..0000000
--- a/infra/modules/ca-webapi/main.tf
+++ /dev/null
@@ -1,102 +0,0 @@
-resource "azapi_resource" "ca_webapi" {
-  name      = var.ca_name
-  location  = var.location
-  parent_id = var.resource_group_id
-  type      = "Microsoft.App/containerApps@2022-11-01-preview"
-  identity {
-    type = "UserAssigned"
-    identity_ids = [
-      var.managed_identity_id
-    ]
-  }
-
-  body = jsonencode({
-    properties : {
-      managedEnvironmentId = "${var.cae_id}"
-      configuration = {
-        secrets = []
-        ingress = {
-          external   = true
-          targetPort = 8080
-          transport  = "Http"
-
-          traffic = [
-            {
-              latestRevision = true
-              weight         = 100
-            }
-          ]
-          corsPolicy = {
-            allowedOrigins = [
-              "https://${var.ca_webapp_name}.${var.cae_default_domain}"
-            ]
-            allowedHeaders   = ["*"]
-            allowCredentials = false
-          }
-        }
-        dapr = {
-          enabled = false
-        }
-      }
-      template = {
-        containers = [
-          {
-            name  = "chat-copilot-webapi"
-            image = "cmendibl3/chat-copilot-webapi:0.1.0"
-            resources = {
-              cpu    = 0.5
-              memory = "1Gi"
-            }
-            env = [
-              {
-                name  = "Authentication__Type"
-                value = "None"
-              },
-              {
-                name  = "Planner__Model"
-                value = "${var.chat_gpt_model}"
-              },
-              {
-                name  = "SemanticMemory__Services__AzureOpenAIText__Endpoint"
-                value = "https://${var.openai_service_name}.openai.azure.com/"
-              },
-              {
-                name  = "SemanticMemory__Services__AzureOpenAIText__Deployment"
-                value = "${var.chat_gpt_model}"
-              },
-              {
-                name  = "SemanticMemory__Services__AzureOpenAIText__Auth"
-                value = "AzureIdentity"
-              },
-              {
-                name  = "SemanticMemory__Services__AzureOpenAIEmbedding__Endpoint"
-                value = "https://${var.openai_service_name}.openai.azure.com/"
-              },
-              {
-                name  = "SemanticMemory__Services__AzureOpenAIEmbedding__Deployment"
-                value = "${var.embeddings_model}"
-              },
-              {
-                name  = "SemanticMemory__Services__AzureOpenAIEmbedding__Auth"
-                value = "AzureIdentity"
-              },
-              {
-                name  = "AZURE_TENANT_ID"
-                value = "${var.tenant_id}"
-              },
-              {
-                name  = "AZURE_CLIENT_ID"
-                value = "${var.managed_identity_client_id}"
-              },
-            ],
-          },
-        ]
-        scale = {
-          minReplicas = 1
-          maxReplicas = 1
-        }
-      }
-    }
-  })
-  response_export_values = ["properties.configuration.ingress.fqdn"]
-}
diff --git a/infra/modules/ca-webapi/outputs.tf b/infra/modules/ca-webapi/outputs.tf
deleted file mode 100644
index 6266cdb..0000000
--- a/infra/modules/ca-webapi/outputs.tf
+++ /dev/null
@@ -1,4 +0,0 @@
-output "fqdn" {
-  value = jsondecode(azapi_resource.ca_webapi.output).properties.configuration.ingress.fqdn
-}
-
diff --git a/infra/modules/ca-webapp/main.tf b/infra/modules/ca-webapp/main.tf
deleted file mode 100644
index 52c491d..0000000
--- a/infra/modules/ca-webapp/main.tf
+++ /dev/null
@@ -1,59 +0,0 @@
-resource "azapi_resource" "ca_webapp" {
-  name      = var.ca_name
-  location  = var.location
-  parent_id = var.resource_group_id
-  type      = "Microsoft.App/containerApps@2022-11-01-preview"
-  identity {
-    type = "UserAssigned"
-    identity_ids = [
-      var.managed_identity_id
-    ]
-  }
-
-  body = jsonencode({
-    properties : {
-      managedEnvironmentId = "${var.cae_id}"
-      configuration = {
-        secrets = []
-        ingress = {
-          external   = true
-          targetPort = 3000
-          transport  = "Http"
-
-          traffic = [
-            {
-              latestRevision = true
-              weight         = 100
-            }
-          ]
-        }
-        dapr = {
-          enabled = false
-        }
-      }
-      template = {
-        containers = [
-          {
-            name  = "chat-copilot-webapp"
-            image = "cmendibl3/chat-copilot-webapp:0.1.0"
-            resources = {
-              cpu    = 0.5
-              memory = "1Gi"
-            }
-            env = [
-              {
-                name  = "REACT_APP_BACKEND_URI"
-                value = "${var.backend_url}"
-              },
-            ],
-          },
-        ]
-        scale = {
-          minReplicas = 1
-          maxReplicas = 1
-        }
-      }
-    }
-  })
-  response_export_values = ["properties.configuration.ingress.fqdn"]
-}
diff --git a/infra/modules/ca-webapp/outputs.tf b/infra/modules/ca-webapp/outputs.tf
deleted file mode 100644
index 83b5bbb..0000000
--- a/infra/modules/ca-webapp/outputs.tf
+++ /dev/null
@@ -1,3 +0,0 @@
-output "fqdn" {
-  value = jsondecode(azapi_resource.ca_webapp.output).properties.configuration.ingress.fqdn
-}
diff --git a/infra/modules/ca-webapp/variables.tf b/infra/modules/ca-webapp/variables.tf
deleted file mode 100644
index d2c65e3..0000000
--- a/infra/modules/ca-webapp/variables.tf
+++ /dev/null
@@ -1,9 +0,0 @@
-variable "resource_group_id" {}
-variable "location" {}
-variable "ca_name" {}
-variable "cae_id" {}
-variable "managed_identity_id" {}
-variable "managed_identity_client_id" {}
-variable "tenant_id" {}
-
-variable "backend_url" {}
\ No newline at end of file
diff --git a/infra/modules/cae/providers.tf b/infra/modules/cae/providers.tf
index 30260c4..c4fa93c 100644
--- a/infra/modules/cae/providers.tf
+++ b/infra/modules/cae/providers.tf
@@ -3,7 +3,7 @@ terraform {
   required_providers {
     azurerm = {
       source  = "hashicorp/azurerm"
-      version = "3.72.0"
+      version = "3.87.0"
     }
     azapi = {
       source = "Azure/azapi"
diff --git a/infra/modules/cog/main.tf b/infra/modules/cog/main.tf
new file mode 100644
index 0000000..14b283d
--- /dev/null
+++ b/infra/modules/cog/main.tf
@@ -0,0 +1,56 @@
+resource "azurerm_cognitive_account" "content_safety" {
+  name                          = var.content_safety_name
+  kind                          = "ContentSafety"
+  sku_name                      = "S0"
+  location                      = var.location
+  resource_group_name           = var.resource_group_name
+  public_network_access_enabled = true
+  custom_subdomain_name         = var.content_safety_name
+}
+
+resource "azurerm_cognitive_account" "cognitive" {
+  name                          = var.cognitive_services_name
+  kind                          = "CognitiveServices"
+  sku_name                      = "S0"
+  location                      = var.location
+  resource_group_name           = var.resource_group_name
+  public_network_access_enabled = true
+  custom_subdomain_name         = var.cognitive_services_name
+}
+
+resource "azurerm_cognitive_account" "speech" {
+  name                          = var.speech_name
+  kind                          = "SpeechServices"
+  sku_name                      = "S0"
+  location                      = var.location
+  resource_group_name           = var.resource_group_name
+  public_network_access_enabled = true
+  custom_subdomain_name         = var.speech_name
+}
+
+resource "azapi_resource" "bing" {
+  name                      = var.bing_name
+  location                  = "global"
+  parent_id                 = var.resource_group_id
+  type                      = "Microsoft.Bing/accounts@2020-06-10"
+  schema_validation_enabled = false // Required for this service otherwise it will fail.
+
+  body = jsonencode({
+    kind = "Bing.Search.v7"
+    sku = {
+      name = "S1"
+    }
+    properties : {
+      statisticsEnabled = true
+    }
+  })
+  response_export_values = ["properties.endpoint"]
+}
+
+
+
+# resource "azurerm_role_assignment" "openai_user" {
+#   scope                = azurerm_cognitive_account.openai.id
+#   role_definition_name = "Cognitive Services OpenAI User"
+#   principal_id         = var.principal_id
+# }
diff --git a/infra/modules/cog/outputs.tf b/infra/modules/cog/outputs.tf
new file mode 100644
index 0000000..5103240
--- /dev/null
+++ b/infra/modules/cog/outputs.tf
@@ -0,0 +1,19 @@
+output "content_safety_endpoint" {
+  value = azurerm_cognitive_account.content_safety.endpoint
+}
+
+output "content_safety_key" {
+  value = azurerm_cognitive_account.content_safety.primary_access_key
+}
+
+output "cognitive_service_endpoint" {
+  value = azurerm_cognitive_account.cognitive.endpoint
+}
+
+output "cognitive_service_key" {
+  value = azurerm_cognitive_account.cognitive.primary_access_key
+}
+
+output "speech_key" {
+  value = azurerm_cognitive_account.speech.primary_access_key
+}
diff --git a/infra/modules/cog/providers.tf b/infra/modules/cog/providers.tf
new file mode 100644
index 0000000..c4fa93c
--- /dev/null
+++ b/infra/modules/cog/providers.tf
@@ -0,0 +1,20 @@
+terraform {
+  required_version = ">= 1.1.8"
+  required_providers {
+    azurerm = {
+      source  = "hashicorp/azurerm"
+      version = "3.87.0"
+    }
+    azapi = {
+      source = "Azure/azapi"
+    }
+  }
+}
+
+provider "azurerm" {
+  features {
+    cognitive_account {
+      purge_soft_delete_on_destroy = true
+    }
+  }
+}
\ No newline at end of file
diff --git a/infra/modules/cog/variables.tf b/infra/modules/cog/variables.tf
new file mode 100644
index 0000000..4f81339
--- /dev/null
+++ b/infra/modules/cog/variables.tf
@@ -0,0 +1,8 @@
+variable "resource_group_name" {}
+variable "resource_group_id" {}
+variable "location" {}
+variable "principal_id" {}
+variable "content_safety_name" {}
+variable "cognitive_services_name" {}
+variable "speech_name" {}
+variable "bing_name" {}
diff --git a/infra/modules/nsg/nsg_apim.tf b/infra/modules/nsg/nsg_apim.tf
index cc3481d..9c72085 100644
--- a/infra/modules/nsg/nsg_apim.tf
+++ b/infra/modules/nsg/nsg_apim.tf
@@ -3,13 +3,26 @@ resource "azurerm_network_security_group" "nsg_apim" {
   location            = var.location
   resource_group_name = var.resource_group_name
 
+  # External Only
   security_rule {
-    name                       = "management-endpoint"
+    name                       = "allowanyhttpsinbound"
     priority                   = 100
     direction                  = "Inbound"
     access                     = "Allow"
     protocol                   = "Tcp"
     source_port_range          = "*"
+    destination_port_range     = "443"
+    source_address_prefix      = "Internet"
+    destination_address_prefix = "VirtualNetwork"
+  }
+
+  security_rule {
+    name                       = "management-endpoint"
+    priority                   = 200
+    direction                  = "Inbound"
+    access                     = "Allow"
+    protocol                   = "Tcp"
+    source_port_range          = "*"
     destination_port_range     = "3443"
     source_address_prefix      = "ApiManagement"
     destination_address_prefix = "VirtualNetwork"
@@ -17,7 +30,7 @@ resource "azurerm_network_security_group" "nsg_apim" {
 
   security_rule {
     name                       = "load-balancer"
-    priority                   = 200
+    priority                   = 210
     direction                  = "Inbound"
     access                     = "Allow"
     protocol                   = "Tcp"
@@ -28,14 +41,14 @@ resource "azurerm_network_security_group" "nsg_apim" {
   }
 
   security_rule {
-    name                       = "allowanyhttpsinbound"
-    priority                   = 310
+    name                       = "traffic-manager"
+    priority                   = 220
     direction                  = "Inbound"
     access                     = "Allow"
     protocol                   = "Tcp"
     source_port_range          = "*"
     destination_port_range     = "443"
-    source_address_prefix      = "*"
+    source_address_prefix      = "AzureTrafficManager"
     destination_address_prefix = "VirtualNetwork"
   }
 
@@ -74,6 +87,18 @@ resource "azurerm_network_security_group" "nsg_apim" {
     source_address_prefix      = "VirtualNetwork"
     destination_address_prefix = "AzureKeyVault"
   }
+
+  security_rule {
+    name                       = "publish-diagnostcs"
+    priority                   = 400
+    direction                  = "Outbound"
+    access                     = "Allow"
+    protocol                   = "Tcp"
+    source_port_range          = "*"
+    destination_port_ranges    = ["1886", "443"]
+    source_address_prefix      = "VirtualNetwork"
+    destination_address_prefix = "AzureMonitor"
+  }
 }
 
 resource "azurerm_subnet_network_security_group_association" "nsg_apim_association" {
diff --git a/infra/modules/openai/main.tf b/infra/modules/openai/main.tf
index f26a5a0..862ca12 100644
--- a/infra/modules/openai/main.tf
+++ b/infra/modules/openai/main.tf
@@ -24,6 +24,22 @@ resource "azurerm_cognitive_deployment" "gpt_35_turbo" {
   }
 }
 
+resource "azurerm_cognitive_deployment" "demo_build" {
+  name                 = "DemoBuild"
+  cognitive_account_id = azurerm_cognitive_account.openai.id
+  rai_policy_name      = "Microsoft.Default"
+  model {
+    format  = "OpenAI"
+    name    = "gpt-35-turbo"
+    version = "0301"
+  }
+
+  scale {
+    type     = "Standard"
+    capacity = 120
+  }
+}
+
 resource "azurerm_cognitive_deployment" "embedding" {
   name                 = "text-embedding-ada-002"
   cognitive_account_id = azurerm_cognitive_account.openai.id
@@ -46,44 +62,3 @@ resource "azurerm_role_assignment" "openai_user" {
   principal_id         = var.principal_id
 }
 
-# resource "azurerm_cognitive_account" "secondary_openai" {
-#   name                          = var.azopenai_name
-#   kind                          = "OpenAI"
-#   sku_name                      = "S0"
-#   location                      = var.secondary_location
-#   resource_group_name           = var.resource_group_name
-#   public_network_access_enabled = true
-#   custom_subdomain_name         = var.azopenai_name
-# }
-
-# resource "azurerm_cognitive_deployment" "secondary_gpt_35_turbo" {
-#   name                 = "gpt-35-turbo"
-#   cognitive_account_id = azurerm_cognitive_account.secondary_openai.id
-#   rai_policy_name      = "Microsoft.Default"
-#   model {
-#     format  = "OpenAI"
-#     name    = "gpt-35-turbo"
-#     version = "0301"
-#   }
-
-#   scale {
-#     type     = "Standard"
-#     capacity = 120
-#   }
-# }
-
-# resource "azurerm_cognitive_deployment" "secondary_embedding" {
-#   name                 = "text-embedding-ada-002"
-#   cognitive_account_id = azurerm_cognitive_account.secondary_openai.id
-#   rai_policy_name      = "Microsoft.Default"
-#   model {
-#     format  = "OpenAI"
-#     name    = "text-embedding-ada-002"
-#     version = "2"
-#   }
-
-#   scale {
-#     type     = "Standard"
-#     capacity = 239
-#   }
-# }
diff --git a/infra/modules/openai/variables.tf b/infra/modules/openai/variables.tf
index bc34a0a..ba92b39 100644
--- a/infra/modules/openai/variables.tf
+++ b/infra/modules/openai/variables.tf
@@ -1,5 +1,4 @@
 variable "resource_group_name" {}
 variable "location" {}
-variable "secondary_location" {}
 variable "azopenai_name" {}
 variable "principal_id" {}
diff --git a/infra/modules/search/main.tf b/infra/modules/search/main.tf
index 2d2770b..19b6c7b 100644
--- a/infra/modules/search/main.tf
+++ b/infra/modules/search/main.tf
@@ -3,6 +3,7 @@ resource "azurerm_search_service" "search" {
   location            = var.location
   resource_group_name = var.resource_group_name
   sku                 = "standard"
+  semantic_search_sku = "free"
 
   local_authentication_enabled = false
 }
@@ -23,4 +24,4 @@ resource "azurerm_role_assignment" "search_service_contributor" {
   scope                = azurerm_search_service.search.id
   role_definition_name = "Search Service Contributor"
   principal_id         = var.principal_id
-}
\ No newline at end of file
+}
diff --git a/infra/modules/sp/main.tf b/infra/modules/sp/main.tf
index a47f547..e833e7a 100644
--- a/infra/modules/sp/main.tf
+++ b/infra/modules/sp/main.tf
@@ -1,5 +1,8 @@
 data "azurerm_client_config" "current" {}
 
+resource "random_uuid" "uuid" {
+}
+
 resource "azuread_application" "sp" {
   display_name    = var.sp_name
   identifier_uris = ["api://${var.sp_name}"]
@@ -22,7 +25,7 @@ resource "azuread_application" "sp" {
       admin_consent_description  = "Allow the application to access example on behalf of the signed-in user."
       admin_consent_display_name = "Allow the application to access example on behalf of the signed-in user."
       enabled                    = true
-      id                         = "96183846-204b-4b43-82e1-5d2222eb4b9b"
+      id                         = random_uuid.uuid.result
       type                       = "User"
       user_consent_description   = "Allow the application to access example on your behalf."
       user_consent_display_name  = "Allow the application to access example on behalf of the signed-in user."
diff --git a/infra/modules/sp/outputs.tf b/infra/modules/sp/outputs.tf
index 112ddc9..b6c3a43 100644
--- a/infra/modules/sp/outputs.tf
+++ b/infra/modules/sp/outputs.tf
@@ -13,3 +13,7 @@ output "id" {
 output "password" {
   value = azuread_service_principal_password.sp.value
 }
+
+output "redirect_uris_count" {
+  value = length(azuread_service_principal.sp.redirect_uris)
+}
diff --git a/infra/modules/st/main.tf b/infra/modules/st/main.tf
index c655611..74e9173 100644
--- a/infra/modules/st/main.tf
+++ b/infra/modules/st/main.tf
@@ -23,8 +23,39 @@ resource "azurerm_storage_container" "content" {
   storage_account_name  = azurerm_storage_account.sa.name
 }
 
-resource "azurerm_role_assignment" "storage_reader" {
+resource "azurerm_storage_container" "audio" {
+  name                  = "audio-files"
+  container_access_type = "private"
+  storage_account_name  = azurerm_storage_account.sa.name
+}
+
+resource "azurerm_storage_container" "form-analyzer" {
+  name                  = "form-analyzer"
+  container_access_type = "private"
+  storage_account_name  = azurerm_storage_account.sa.name
+}
+
+resource "azurerm_storage_container" "image-analyzer" {
+  name                  = "image-analyzer"
+  container_access_type = "private"
+  storage_account_name  = azurerm_storage_account.sa.name
+}
+
+resource "azurerm_storage_container" "image-moderator" {
+  name                  = "image-moderator"
+  container_access_type = "private"
+  storage_account_name  = azurerm_storage_account.sa.name
+}
+
+resource "azurerm_storage_share" "share" {
+  name                 = "staging"
+  storage_account_name = azurerm_storage_account.sa.name
+  quota                = 5
+}
+
+resource "azurerm_role_assignment" "storage_contributor" {
   scope                = azurerm_storage_account.sa.id
-  role_definition_name = "Storage Blob Data Reader"
+  role_definition_name = "Storage Blob Data Contributor"
   principal_id         = var.principal_id
 }
+
diff --git a/infra/modules/st/outputs.tf b/infra/modules/st/outputs.tf
index 4cc981f..a229fdc 100644
--- a/infra/modules/st/outputs.tf
+++ b/infra/modules/st/outputs.tf
@@ -4,4 +4,12 @@ output "storage_account_name" {
 
 output "storage_container_name" {
   value = azurerm_storage_container.content.name
-}
\ No newline at end of file
+}
+
+output "connection_string" {
+  value = azurerm_storage_account.sa.primary_connection_string
+}
+
+output "key" {
+  value = azurerm_storage_account.sa.primary_access_key
+}
diff --git a/infra/providers.tf b/infra/providers.tf
index 30260c4..c4fa93c 100644
--- a/infra/providers.tf
+++ b/infra/providers.tf
@@ -3,7 +3,7 @@ terraform {
   required_providers {
     azurerm = {
       source  = "hashicorp/azurerm"
-      version = "3.72.0"
+      version = "3.87.0"
     }
     azapi = {
       source = "Azure/azapi"
diff --git a/infra/variables.tf b/infra/variables.tf
index fb61ea2..5dbb7e3 100644
--- a/infra/variables.tf
+++ b/infra/variables.tf
@@ -3,11 +3,7 @@ variable "resource_group_name" {
 }
 
 variable "location" {
-  default = "West Europe"
-}
-
-variable "secondary_location" {
-  default = "North Europe"
+  default = "eastus"
 }
 
 variable "log_name" {
@@ -18,6 +14,22 @@ variable "azopenai_name" {
   default = "cog-openai-activate-genai"
 }
 
+variable "content_safety_name" {
+  default = "cog-content-safety-activate-genai"
+}
+
+variable "cognitive_services_name" {
+  default = "cog-cognitive-activate-genai"
+}
+
+variable "speech_name" {
+  default = "cog-speech-activate-genai"
+}
+
+variable "bing_name" {
+  default = "cog-bing-activate-genai"
+}
+
 variable "search_name" {
   default = "srch-activate-genai"
 }
@@ -30,10 +42,6 @@ variable "storage_account_name" {
   default = "stgenai"
 }
 
-variable "eventhub_name" {
-  default = "evh-activate-genai"
-}
-
 variable "apim_name" {
   default = "apim-activate-genai"
 }
@@ -45,6 +53,7 @@ variable "appi_name" {
 variable "publisher_name" {
   default = "contoso"
 }
+
 variable "publisher_email" {
   default = "admin@contoso.com"
 }
@@ -61,22 +70,34 @@ variable "cae_name" {
   default = "cae-activate-genai"
 }
 
-variable "ca_back_name" {
-  default = "ca-back-activate-genai"
+variable "ca_chat_name" {
+  default = "ca-chat-activate-genai"
+}
+
+variable "ca_prep_docs_name" {
+  default = "ca-prep-docs-activate-genai"
 }
 
-variable "ca_webapi_name" {
-  default = "ca-webapi-activate-genai"
+variable "ca_aihub_name" {
+  default = "ca-aihub-activate-genai"
 }
 
-variable "ca_webapp_name" {
-  default = "ca-webapp-activate-genai"
+variable "use_random_suffix" {
+  default = true
 }
 
 variable "enable_entra_id_authentication" {
-  default = false
+  default = true
 }
 
 variable "enable_apim" {
-  default = false
+  default = true
+}
+
+variable "bing_key" {
+  default = "<replace with a valid bing key>"
+}
+
+variable "pbi_report_link" {
+  default = ""
 }
diff --git a/scripts/Dockerfile b/scripts/Dockerfile
new file mode 100644
index 0000000..48ca968
--- /dev/null
+++ b/scripts/Dockerfile
@@ -0,0 +1,11 @@
+FROM mcr.microsoft.com/devcontainers/python:3.10
+
+RUN python3 -m venv ./scripts/.venv
+
+COPY ./ ./scripts/
+
+RUN chmod +x ./scripts/prepdocs.sh
+
+RUN ./scripts/.venv/bin/python -m pip install -r ./scripts/requirements.txt
+
+ENTRYPOINT ["./scripts/prepdocs.sh"]
diff --git a/scripts/prepdocs.py b/scripts/prepdocs.py
new file mode 100644
index 0000000..ad76beb
--- /dev/null
+++ b/scripts/prepdocs.py
@@ -0,0 +1,638 @@
+import argparse
+import base64
+import glob
+import html
+import io
+import os
+import re
+import time
+
+import openai
+import tiktoken
+from azure.ai.formrecognizer import DocumentAnalysisClient
+from azure.core.credentials import AzureKeyCredential
+from azure.identity import DefaultAzureCredential
+from azure.search.documents import SearchClient
+from azure.search.documents.indexes import SearchIndexClient
+from azure.search.documents.indexes.models import (
+    HnswParameters,
+    PrioritizedFields,
+    SearchableField,
+    SearchField,
+    SearchFieldDataType,
+    SearchIndex,
+    SemanticConfiguration,
+    SemanticField,
+    SemanticSettings,
+    SimpleField,
+    VectorSearch,
+    VectorSearchAlgorithmConfiguration,
+)
+from azure.storage.blob import BlobServiceClient
+from pypdf import PdfReader, PdfWriter
+from tenacity import (
+    retry,
+    retry_if_exception_type,
+    stop_after_attempt,
+    wait_random_exponential,
+)
+
+args = argparse.Namespace(verbose=True, openaihost="azure")
+
+MAX_SECTION_LENGTH = 1000
+SENTENCE_SEARCH_LIMIT = 100
+SECTION_OVERLAP = 100
+
+open_ai_token_cache = {}
+CACHE_KEY_TOKEN_CRED = "openai_token_cred"
+CACHE_KEY_CREATED_TIME = "created_time"
+CACHE_KEY_TOKEN_TYPE = "token_type"
+
+# Embedding batch support section
+SUPPORTED_BATCH_AOAI_MODEL = {"text-embedding-ada-002": {"token_limit": 8100, "max_batch_size": 16}}
+
+
+def calculate_tokens_emb_aoai(input: str):
+    encoding = tiktoken.encoding_for_model(args.openaimodelname)
+    return len(encoding.encode(input))
+
+
+def blob_name_from_file_page(filename, page=0):
+    if os.path.splitext(filename)[1].lower() == ".pdf":
+        return os.path.splitext(os.path.basename(filename))[0] + f"-{page}" + ".pdf"
+    else:
+        return os.path.basename(filename)
+
+
+def upload_blobs(filename):
+    blob_service = BlobServiceClient(
+        account_url=f"https://{args.storageaccount}.blob.core.windows.net", credential=storage_creds
+    )
+    blob_container = blob_service.get_container_client(args.container)
+    if not blob_container.exists():
+        blob_container.create_container()
+
+    # if file is PDF split into pages and upload each page as a separate blob
+    if os.path.splitext(filename)[1].lower() == ".pdf":
+        reader = PdfReader(filename)
+        pages = reader.pages
+        for i in range(len(pages)):
+            blob_name = blob_name_from_file_page(filename, i)
+            if args.verbose:
+                print(f"\tUploading blob for page {i} -> {blob_name}")
+            f = io.BytesIO()
+            writer = PdfWriter()
+            writer.add_page(pages[i])
+            writer.write(f)
+            f.seek(0)
+            blob_container.upload_blob(blob_name, f, overwrite=True)
+    else:
+        blob_name = blob_name_from_file_page(filename)
+        with open(filename, "rb") as data:
+            blob_container.upload_blob(blob_name, data, overwrite=True)
+
+
+def remove_blobs(filename):
+    if args.verbose:
+        print(f"Removing blobs for '{filename or '<all>'}'")
+    blob_service = BlobServiceClient(
+        account_url=f"https://{args.storageaccount}.blob.core.windows.net", credential=storage_creds
+    )
+    blob_container = blob_service.get_container_client(args.container)
+    if blob_container.exists():
+        if filename is None:
+            blobs = blob_container.list_blob_names()
+        else:
+            prefix = os.path.splitext(os.path.basename(filename))[0]
+            blobs = filter(
+                lambda b: re.match(f"{prefix}-\d+\.pdf", b),
+                blob_container.list_blob_names(name_starts_with=os.path.splitext(os.path.basename(prefix))[0]),
+            )
+        for b in blobs:
+            if args.verbose:
+                print(f"\tRemoving blob {b}")
+            blob_container.delete_blob(b)
+
+
+def table_to_html(table):
+    table_html = "<table>"
+    rows = [
+        sorted([cell for cell in table.cells if cell.row_index == i], key=lambda cell: cell.column_index)
+        for i in range(table.row_count)
+    ]
+    for row_cells in rows:
+        table_html += "<tr>"
+        for cell in row_cells:
+            tag = "th" if (cell.kind == "columnHeader" or cell.kind == "rowHeader") else "td"
+            cell_spans = ""
+            if cell.column_span > 1:
+                cell_spans += f" colSpan={cell.column_span}"
+            if cell.row_span > 1:
+                cell_spans += f" rowSpan={cell.row_span}"
+            table_html += f"<{tag}{cell_spans}>{html.escape(cell.content)}</{tag}>"
+        table_html += "</tr>"
+    table_html += "</table>"
+    return table_html
+
+
+def get_document_text(filename):
+    offset = 0
+    page_map = []
+    if args.localpdfparser:
+        reader = PdfReader(filename)
+        pages = reader.pages
+        for page_num, p in enumerate(pages):
+            page_text = p.extract_text()
+            page_map.append((page_num, offset, page_text))
+            offset += len(page_text)
+    else:
+        if args.verbose:
+            print(f"Extracting text from '{filename}' using Azure Form Recognizer")
+        form_recognizer_client = DocumentAnalysisClient(
+            endpoint=f"https://{args.formrecognizerservice}.cognitiveservices.azure.com/",
+            credential=formrecognizer_creds,
+            headers={"x-ms-useragent": "azure-search-chat-demo/1.0.0"},
+        )
+        with open(filename, "rb") as f:
+            poller = form_recognizer_client.begin_analyze_document("prebuilt-layout", document=f)
+        form_recognizer_results = poller.result()
+
+        for page_num, page in enumerate(form_recognizer_results.pages):
+            tables_on_page = [
+                table
+                for table in form_recognizer_results.tables
+                if table.bounding_regions[0].page_number == page_num + 1
+            ]
+
+            # mark all positions of the table spans in the page
+            page_offset = page.spans[0].offset
+            page_length = page.spans[0].length
+            table_chars = [-1] * page_length
+            for table_id, table in enumerate(tables_on_page):
+                for span in table.spans:
+                    # replace all table spans with "table_id" in table_chars array
+                    for i in range(span.length):
+                        idx = span.offset - page_offset + i
+                        if idx >= 0 and idx < page_length:
+                            table_chars[idx] = table_id
+
+            # build page text by replacing characters in table spans with table html
+            page_text = ""
+            added_tables = set()
+            for idx, table_id in enumerate(table_chars):
+                if table_id == -1:
+                    page_text += form_recognizer_results.content[page_offset + idx]
+                elif table_id not in added_tables:
+                    page_text += table_to_html(tables_on_page[table_id])
+                    added_tables.add(table_id)
+
+            page_text += " "
+            page_map.append((page_num, offset, page_text))
+            offset += len(page_text)
+
+    return page_map
+
+
+def split_text(page_map, filename):
+    SENTENCE_ENDINGS = [".", "!", "?"]
+    WORDS_BREAKS = [",", ";", ":", " ", "(", ")", "[", "]", "{", "}", "\t", "\n"]
+    if args.verbose:
+        print(f"Splitting '{filename}' into sections")
+
+    def find_page(offset):
+        num_pages = len(page_map)
+        for i in range(num_pages - 1):
+            if offset >= page_map[i][1] and offset < page_map[i + 1][1]:
+                return i
+        return num_pages - 1
+
+    all_text = "".join(p[2] for p in page_map)
+    length = len(all_text)
+    start = 0
+    end = length
+    while start + SECTION_OVERLAP < length:
+        last_word = -1
+        end = start + MAX_SECTION_LENGTH
+
+        if end > length:
+            end = length
+        else:
+            # Try to find the end of the sentence
+            while (
+                end < length
+                and (end - start - MAX_SECTION_LENGTH) < SENTENCE_SEARCH_LIMIT
+                and all_text[end] not in SENTENCE_ENDINGS
+            ):
+                if all_text[end] in WORDS_BREAKS:
+                    last_word = end
+                end += 1
+            if end < length and all_text[end] not in SENTENCE_ENDINGS and last_word > 0:
+                end = last_word  # Fall back to at least keeping a whole word
+        if end < length:
+            end += 1
+
+        # Try to find the start of the sentence or at least a whole word boundary
+        last_word = -1
+        while (
+            start > 0
+            and start > end - MAX_SECTION_LENGTH - 2 * SENTENCE_SEARCH_LIMIT
+            and all_text[start] not in SENTENCE_ENDINGS
+        ):
+            if all_text[start] in WORDS_BREAKS:
+                last_word = start
+            start -= 1
+        if all_text[start] not in SENTENCE_ENDINGS and last_word > 0:
+            start = last_word
+        if start > 0:
+            start += 1
+
+        section_text = all_text[start:end]
+        yield (section_text, find_page(start))
+
+        last_table_start = section_text.rfind("<table")
+        if last_table_start > 2 * SENTENCE_SEARCH_LIMIT and last_table_start > section_text.rfind("</table"):
+            # If the section ends with an unclosed table, we need to start the next section with the table.
+            # If table starts inside SENTENCE_SEARCH_LIMIT, we ignore it, as that will cause an infinite loop for tables longer than MAX_SECTION_LENGTH
+            # If last table starts inside SECTION_OVERLAP, keep overlapping
+            if args.verbose:
+                print(
+                    f"Section ends with unclosed table, starting next section with the table at page {find_page(start)} offset {start} table start {last_table_start}"
+                )
+            start = min(end - SECTION_OVERLAP, start + last_table_start)
+        else:
+            start = end - SECTION_OVERLAP
+
+    if start + SECTION_OVERLAP < end:
+        yield (all_text[start:end], find_page(start))
+
+
+def filename_to_id(filename):
+    filename_ascii = re.sub("[^0-9a-zA-Z_-]", "_", filename)
+    filename_hash = base64.b16encode(filename.encode("utf-8")).decode("ascii")
+    return f"file-{filename_ascii}-{filename_hash}"
+
+
+def create_sections(filename, page_map, use_vectors, embedding_deployment: str = None, embedding_model: str = None):
+    file_id = filename_to_id(filename)
+    for i, (content, pagenum) in enumerate(split_text(page_map, filename)):
+        section = {
+            "id": f"{file_id}-page-{i}",
+            "content": content,
+            "category": args.category,
+            "sourcepage": blob_name_from_file_page(filename, pagenum),
+            "sourcefile": filename,
+        }
+        if use_vectors:
+            section["embedding"] = compute_embedding(content, embedding_deployment, embedding_model)
+        yield section
+
+
+def before_retry_sleep(retry_state):
+    if args.verbose:
+        print("Rate limited on the OpenAI embeddings API, sleeping before retrying...")
+
+
+@retry(
+    retry=retry_if_exception_type(openai.error.RateLimitError),
+    wait=wait_random_exponential(min=15, max=60),
+    stop=stop_after_attempt(15),
+    before_sleep=before_retry_sleep,
+)
+def compute_embedding(text, embedding_deployment, embedding_model):
+    refresh_openai_token()
+    embedding_args = {"deployment_id": embedding_deployment} if args.openaihost == "azure" else {}
+    return openai.Embedding.create(**embedding_args, model=embedding_model, input=text)["data"][0]["embedding"]
+
+
+@retry(
+    retry=retry_if_exception_type(openai.error.RateLimitError),
+    wait=wait_random_exponential(min=15, max=60),
+    stop=stop_after_attempt(15),
+    before_sleep=before_retry_sleep,
+)
+def compute_embedding_in_batch(texts):
+    refresh_openai_token()
+    embedding_args = {"deployment_id": args.openaideployment} if args.openaihost == "azure" else {}
+    emb_response = openai.Embedding.create(**embedding_args, model=args.openaimodelname, input=texts)
+    return [data.embedding for data in emb_response.data]
+
+
+def create_search_index():
+    if args.verbose:
+        print(f"Ensuring search index {args.index} exists")
+    index_client = SearchIndexClient(
+        endpoint=f"https://{args.searchservice}.search.windows.net/", credential=search_creds
+    )
+    if args.index not in index_client.list_index_names():
+        index = SearchIndex(
+            name=args.index,
+            fields=[
+                SimpleField(name="id", type="Edm.String", key=True),
+                SearchableField(name="content", type="Edm.String", analyzer_name="en.microsoft"),
+                SearchField(
+                    name="embedding",
+                    type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
+                    hidden=False,
+                    searchable=True,
+                    filterable=False,
+                    sortable=False,
+                    facetable=False,
+                    vector_search_dimensions=1536,
+                    vector_search_configuration="default",
+                ),
+                SimpleField(name="category", type="Edm.String", filterable=True, facetable=True),
+                SimpleField(name="sourcepage", type="Edm.String", filterable=True, facetable=True),
+                SimpleField(name="sourcefile", type="Edm.String", filterable=True, facetable=True),
+            ],
+            semantic_settings=SemanticSettings(
+                configurations=[
+                    SemanticConfiguration(
+                        name="default",
+                        prioritized_fields=PrioritizedFields(
+                            title_field=None, prioritized_content_fields=[SemanticField(field_name="content")]
+                        ),
+                    )
+                ]
+            ),
+            vector_search=VectorSearch(
+                algorithm_configurations=[
+                    VectorSearchAlgorithmConfiguration(
+                        name="default", kind="hnsw", hnsw_parameters=HnswParameters(metric="cosine")
+                    )
+                ]
+            ),
+        )
+        if args.verbose:
+            print(f"Creating {args.index} search index")
+        index_client.create_index(index)
+    else:
+        if args.verbose:
+            print(f"Search index {args.index} already exists")
+
+
+def update_embeddings_in_batch(sections):
+    batch_queue = []
+    copy_s = []
+    batch_response = {}
+    token_count = 0
+    for s in sections:
+        token_count += calculate_tokens_emb_aoai(s["content"])
+        if (
+            token_count <= SUPPORTED_BATCH_AOAI_MODEL[args.openaimodelname]["token_limit"]
+            and len(batch_queue) < SUPPORTED_BATCH_AOAI_MODEL[args.openaimodelname]["max_batch_size"]
+        ):
+            batch_queue.append(s)
+            copy_s.append(s)
+        else:
+            emb_responses = compute_embedding_in_batch([item["content"] for item in batch_queue])
+            if args.verbose:
+                print(f"Batch Completed. Batch size  {len(batch_queue)} Token count {token_count}")
+            for emb, item in zip(emb_responses, batch_queue):
+                batch_response[item["id"]] = emb
+            batch_queue = []
+            batch_queue.append(s)
+            token_count = calculate_tokens_emb_aoai(s["content"])
+
+    if batch_queue:
+        emb_responses = compute_embedding_in_batch([item["content"] for item in batch_queue])
+        if args.verbose:
+            print(f"Batch Completed. Batch size  {len(batch_queue)} Token count {token_count}")
+        for emb, item in zip(emb_responses, batch_queue):
+            batch_response[item["id"]] = emb
+
+    for s in copy_s:
+        s["embedding"] = batch_response[s["id"]]
+        yield s
+
+
+def index_sections(filename, sections):
+    if args.verbose:
+        print(f"Indexing sections from '{filename}' into search index '{args.index}'")
+    search_client = SearchClient(
+        endpoint=f"https://{args.searchservice}.search.windows.net/", index_name=args.index, credential=search_creds
+    )
+    i = 0
+    batch = []
+    for s in sections:
+        batch.append(s)
+        i += 1
+        if i % 1000 == 0:
+            results = search_client.upload_documents(documents=batch)
+            succeeded = sum([1 for r in results if r.succeeded])
+            if args.verbose:
+                print(f"\tIndexed {len(results)} sections, {succeeded} succeeded")
+            batch = []
+
+    if len(batch) > 0:
+        results = search_client.upload_documents(documents=batch)
+        succeeded = sum([1 for r in results if r.succeeded])
+        if args.verbose:
+            print(f"\tIndexed {len(results)} sections, {succeeded} succeeded")
+
+
+def remove_from_index(filename):
+    if args.verbose:
+        print(f"Removing sections from '{filename or '<all>'}' from search index '{args.index}'")
+    search_client = SearchClient(
+        endpoint=f"https://{args.searchservice}.search.windows.net/", index_name=args.index, credential=search_creds
+    )
+    while True:
+        filter = None if filename is None else f"sourcefile eq '{os.path.basename(filename)}'"
+        r = search_client.search("", filter=filter, top=1000, include_total_count=True)
+        if r.get_count() == 0:
+            break
+        r = search_client.delete_documents(documents=[{"id": d["id"]} for d in r])
+        if args.verbose:
+            print(f"\tRemoved {len(r)} sections from index")
+        # It can take a few seconds for search results to reflect changes, so wait a bit
+        time.sleep(2)
+
+
+def refresh_openai_token():
+    """
+    Refresh OpenAI token every 5 minutes
+    """
+    if (
+        CACHE_KEY_TOKEN_TYPE in open_ai_token_cache
+        and open_ai_token_cache[CACHE_KEY_TOKEN_TYPE] == "azure_ad"
+        and open_ai_token_cache[CACHE_KEY_CREATED_TIME] + 300 < time.time()
+    ):
+        token_cred = open_ai_token_cache[CACHE_KEY_TOKEN_CRED]
+        openai.api_key = token_cred.get_token("https://cognitiveservices.azure.com/.default").token
+        open_ai_token_cache[CACHE_KEY_CREATED_TIME] = time.time()
+
+
+def read_files(
+    path_pattern: str,
+    use_vectors: bool,
+    vectors_batch_support: bool,
+    embedding_deployment: str = None,
+    embedding_model: str = None,
+):
+    """
+    Recursively read directory structure under `path_pattern`
+    and execute indexing for the individual files
+    """
+    for filename in glob.glob(path_pattern):
+        if args.verbose:
+            print(f"Processing '{filename}'")
+        if args.remove:
+            remove_blobs(filename)
+            remove_from_index(filename)
+        else:
+            if os.path.isdir(filename):
+                read_files(filename + "/*", use_vectors, vectors_batch_support)
+                continue
+            try:
+                if not args.skipblobs:
+                    upload_blobs(filename)
+                page_map = get_document_text(filename)
+                sections = create_sections(
+                    os.path.basename(filename),
+                    page_map,
+                    use_vectors and not vectors_batch_support,
+                    embedding_deployment,
+                    embedding_model,
+                )
+                if use_vectors and vectors_batch_support:
+                    sections = update_embeddings_in_batch(sections)
+                index_sections(os.path.basename(filename), sections)
+            except Exception as e:
+                print(f"\tGot an error while reading {filename} -> {e} --> skipping file")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Prepare documents by extracting content from PDFs, splitting content into sections, uploading to blob storage, and indexing in a search index.",
+        epilog="Example: prepdocs.py '..\data\*' --storageaccount myaccount --container mycontainer --searchservice mysearch --index myindex -v",
+    )
+    parser.add_argument("files", help="Files to be processed")
+    parser.add_argument(
+        "--category", help="Value for the category field in the search index for all sections indexed in this run"
+    )
+    parser.add_argument(
+        "--skipblobs", action="store_true", help="Skip uploading individual pages to Azure Blob Storage"
+    )
+    parser.add_argument("--storageaccount", help="Azure Blob Storage account name")
+    parser.add_argument("--container", help="Azure Blob Storage container name")
+    parser.add_argument(
+        "--storagekey",
+        required=False,
+        help="Optional. Use this Azure Blob Storage account key instead of the current user identity to login (use az login to set current user for Azure)",
+    )
+    parser.add_argument(
+        "--tenantid", required=False, help="Optional. Use this to define the Azure directory where to authenticate)"
+    )
+    parser.add_argument(
+        "--searchservice",
+        help="Name of the Azure Cognitive Search service where content should be indexed (must exist already)",
+    )
+    parser.add_argument(
+        "--index",
+        help="Name of the Azure Cognitive Search index where content should be indexed (will be created if it doesn't exist)",
+    )
+    parser.add_argument(
+        "--searchkey",
+        required=False,
+        help="Optional. Use this Azure Cognitive Search account key instead of the current user identity to login (use az login to set current user for Azure)",
+    )
+    parser.add_argument("--openaihost", help="Host of the API used to compute embeddings ('azure' or 'openai')")
+    parser.add_argument("--openaiservice", help="Name of the Azure OpenAI service used to compute embeddings")
+    parser.add_argument(
+        "--openaideployment",
+        help="Name of the Azure OpenAI model deployment for an embedding model ('text-embedding-ada-002' recommended)",
+    )
+    parser.add_argument(
+        "--openaimodelname", help="Name of the Azure OpenAI embedding model ('text-embedding-ada-002' recommended)"
+    )
+    parser.add_argument(
+        "--novectors",
+        action="store_true",
+        help="Don't compute embeddings for the sections (e.g. don't call the OpenAI embeddings API during indexing)",
+    )
+    parser.add_argument(
+        "--disablebatchvectors", action="store_true", help="Don't compute embeddings in batch for the sections"
+    )
+    parser.add_argument(
+        "--openaikey",
+        required=False,
+        help="Optional. Use this Azure OpenAI account key instead of the current user identity to login (use az login to set current user for Azure). This is required only when using non-Azure endpoints.",
+    )
+    parser.add_argument("--openaiorg", required=False, help="This is required only when using non-Azure endpoints.")
+    parser.add_argument(
+        "--remove",
+        action="store_true",
+        help="Remove references to this document from blob storage and the search index",
+    )
+    parser.add_argument(
+        "--removeall",
+        action="store_true",
+        help="Remove all blobs from blob storage and documents from the search index",
+    )
+    parser.add_argument(
+        "--localpdfparser",
+        action="store_true",
+        help="Use PyPdf local PDF parser (supports only digital PDFs) instead of Azure Form Recognizer service to extract text, tables and layout from the documents",
+    )
+    parser.add_argument(
+        "--formrecognizerservice",
+        required=False,
+        help="Optional. Name of the Azure Form Recognizer service which will be used to extract text, tables and layout from the documents (must exist already)",
+    )
+    parser.add_argument(
+        "--formrecognizerkey",
+        required=False,
+        help="Optional. Use this Azure Form Recognizer account key instead of the current user identity to login (use az login to set current user for Azure)",
+    )
+
+    parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output")
+    args = parser.parse_args()
+
+    # Use the current user identity to connect to Azure services unless a key is explicitly set for any of them
+    azd_credential = DefaultAzureCredential()
+    default_creds = azd_credential if args.searchkey is None or args.storagekey is None else None
+    search_creds = default_creds if args.searchkey is None else AzureKeyCredential(args.searchkey)
+    use_vectors = not args.novectors
+    compute_vectors_in_batch = not args.disablebatchvectors and args.openaimodelname in SUPPORTED_BATCH_AOAI_MODEL
+
+    if not args.skipblobs:
+        storage_creds = default_creds if args.storagekey is None else args.storagekey
+    if not args.localpdfparser:
+        # check if Azure Form Recognizer credentials are provided
+        if args.formrecognizerservice is None:
+            print(
+                "Error: Azure Form Recognizer service is not provided. Please provide formrecognizerservice or use --localpdfparser for local pypdf parser."
+            )
+            exit(1)
+        formrecognizer_creds = (
+            default_creds if args.formrecognizerkey is None else AzureKeyCredential(args.formrecognizerkey)
+        )
+
+    if use_vectors:
+        if args.openaihost == "azure":
+            if not args.openaikey:
+                openai.api_key = azd_credential.get_token("https://cognitiveservices.azure.com/.default").token
+                openai.api_type = "azure_ad"
+                open_ai_token_cache[CACHE_KEY_CREATED_TIME] = time.time()
+                open_ai_token_cache[CACHE_KEY_TOKEN_CRED] = azd_credential
+                open_ai_token_cache[CACHE_KEY_TOKEN_TYPE] = "azure_ad"
+            else:
+                openai.api_key = args.openaikey
+                openai.api_type = "azure"
+            openai.api_base = f"https://{args.openaiservice}.openai.azure.com"
+            openai.api_version = "2023-05-15"
+        else:
+            print("using normal openai")
+            openai.api_key = args.openaikey
+            openai.organization = args.openaiorg
+            openai.api_type = "openai"
+
+    if args.removeall:
+        remove_blobs(None)
+        remove_from_index(None)
+    else:
+        if not args.remove:
+            create_search_index()
+
+        print("Processing files...")
+        read_files(args.files, use_vectors, compute_vectors_in_batch, args.openaideployment, args.openaimodelname)
diff --git a/scripts/prepdocs.sh b/scripts/prepdocs.sh
new file mode 100644
index 0000000..d937b08
--- /dev/null
+++ b/scripts/prepdocs.sh
@@ -0,0 +1,4 @@
+#!/bin/sh
+
+echo 'Running "prepdocs.py"'
+./scripts/.venv/bin/python ./scripts/prepdocs.py '/data/*' --storageaccount "$AZURE_STORAGE_ACCOUNT" --container "$AZURE_STORAGE_CONTAINER" --searchservice "$AZURE_SEARCH_SERVICE" --openaihost "$OPENAI_HOST" --openaiservice "$AZURE_OPENAI_SERVICE" --openaikey "$OPENAI_API_KEY" --openaiorg "$OPENAI_ORGANIZATION" --openaideployment "$AZURE_OPENAI_EMB_DEPLOYMENT" --openaimodelname "$AZURE_OPENAI_EMB_MODEL_NAME" --index "$AZURE_SEARCH_INDEX" --formrecognizerservice "$AZURE_FORMRECOGNIZER_SERVICE" --tenantid "$AZURE_TENANT_ID" --localpdfparser -v
diff --git a/scripts/requirements.txt b/scripts/requirements.txt
new file mode 100644
index 0000000..2d22cfc
--- /dev/null
+++ b/scripts/requirements.txt
@@ -0,0 +1,8 @@
+pypdf==3.9.0
+azure-identity==1.13.0
+azure-search-documents==11.4.0b6
+azure-ai-formrecognizer==3.2.1
+azure-storage-blob==12.14.1
+openai[datalib]==0.27.8
+tiktoken==0.4.0
+tenacity==8.2.2
\ No newline at end of file