port-labs · mk-armah · Apr 8, 2025 · Mar 18, 2025 · Mar 24, 2025 · Mar 26, 2025
diff --git a/integrations/bitbucket-cloud/.port/resources/port-app-config.yml b/integrations/bitbucket-cloud/.port/resources/port-app-config.yml
@@ -25,5 +25,6 @@ resources:
           properties:
             url: ".links.html.href"
             defaultBranch: .mainbranch.name
+            readme: file://README.md
           relations:
             project: '.project.uuid | gsub("[{-}]"; "")'
diff --git a/integrations/bitbucket-cloud/CHANGELOG.md b/integrations/bitbucket-cloud/CHANGELOG.md
@@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 <!-- towncrier release notes start -->
 
+## 0.1.5 (2025-04-08)
+
+
+### Features
+
+- Added support for ingesting file kind and parsing JSON and YAML files
+
+
 ## 0.1.4 (2025-04-07)
 
 

diff --git a/integrations/bitbucket-cloud/bitbucket_cloud/client.py b/integrations/bitbucket-cloud/bitbucket_cloud/client.py
@@ -69,23 +69,22 @@ async def _send_api_request(
         params: Optional[dict[str, Any]] = None,
         json_data: Optional[dict[str, Any]] = None,
         method: str = "GET",
+        return_full_response: bool = False,
     ) -> Any:
         """Send request to Bitbucket API with error handling."""
         response = await self.client.request(
             method=method, url=url, params=params, json=json_data
         )
         try:
             response.raise_for_status()
-            return response.json()
+            return response if return_full_response else response.json()
         except HTTPStatusError as e:
-            error_data = e.response.json()
-            error_message = error_data.get("error", {}).get("message", str(e))
             if e.response.status_code == 404:
-                logger.error(
-                    f"Requested resource not found: {url}; message: {error_message}"
+                logger.warning(
+                    f"Requested resource not found: {url}; message: {str(e)}"
                 )
                 return {}
-            logger.error(f"Bitbucket API error: {error_message}")
+            logger.error(f"Bitbucket API error: {str(e)}")
             raise e
         except HTTPError as e:
             logger.error(f"Failed to send {method} request to url {url}: {str(e)}")
@@ -166,13 +165,19 @@ async def get_repositories(
             yield repos
 
     async def get_directory_contents(
-        self, repo_slug: str, branch: str, path: str, max_depth: int = 2
+        self,
+        repo_slug: str,
+        branch: str,
+        path: str,
+        max_depth: int,
+        params: Optional[dict[str, Any]] = None,
     ) -> AsyncGenerator[list[dict[str, Any]], None]:
         """Get contents of a directory."""
-        params = {
-            "max_depth": max_depth,
-            "pagelen": PAGE_SIZE,
-        }
+        if params is None:
+            params = {
+                "max_depth": max_depth,
+                "pagelen": PAGE_SIZE,
+            }
         async for contents in self._fetch_paginated_api_with_rate_limiter(
             f"{self.base_url}/repositories/{self.workspace}/{repo_slug}/src/{branch}/{path}",
             params=params,
@@ -212,3 +217,33 @@ async def get_repository(self, repo_slug: str) -> dict[str, Any]:
         return await self._send_api_request(
             f"{self.base_url}/repositories/{self.workspace}/{repo_slug}"
         )
+
+    async def get_repository_files(self, repo: str, branch: str, path: str) -> Any:
+        """Get the content of a file."""
+        response = await self._send_api_request(
+            f"{self.base_url}/repositories/{self.workspace}/{repo}/src/{branch}/{path}",
+            method="GET",
+            return_full_response=True,
+        )
+        logger.info(f"Retrieved file content for {repo}/{branch}/{path}")
+        return response.text
+
+    async def search_files(
+        self,
+        search_query: str,
+    ) -> AsyncGenerator[list[dict[str, Any]], None]:
+        """Search for files using Bitbucket's search API."""
+        params = {
+            "pagelen": 300,
+            "search_query": search_query,
+            "fields": "+values.file.commit.repository.mainbranch.name",
+        }
+
+        async for results in self._send_paginated_api_request(
+            f"{self.base_url}/workspaces/{self.workspace}/search/code",
+            params=params,
+        ):
+            logger.info(
+                f"Fetched batch of {len(results)} matching files from workspace {self.workspace}"
+            )
+            yield results
diff --git a/integrations/bitbucket-cloud/bitbucket_cloud/entity_processors/file_entity_processor.py b/integrations/bitbucket-cloud/bitbucket_cloud/entity_processors/file_entity_processor.py
@@ -0,0 +1,68 @@
+from typing import Any, Optional
+from loguru import logger
+from port_ocean.core.handlers import JQEntityProcessor
+from initialize_client import init_client
+
+
+FILE_PROPERTY_PREFIX = "file://"
+
+
+class FileEntityProcessor(JQEntityProcessor):
+    prefix = FILE_PROPERTY_PREFIX
+
+    async def _get_file_content(
+        self, repo_slug: str, ref: str, file_path: str
+    ) -> Optional[Any]:
+        """Helper method to fetch and process file content."""
+        try:
+            bitbucket_client = init_client()
+            return await bitbucket_client.get_repository_files(
+                repo_slug, ref, file_path
+            )
+        except Exception as e:
+            logger.error(
+                f"Failed to get file content for {file_path} in repository {repo_slug} in branch {ref}: {e}"
+            )
+            return None
+
+    async def _search(self, data: dict[str, Any], pattern: str) -> Any:
+        """
+        Search for a file in the repository and return its content.
+
+        Args:
+            data (dict[str, Any]): The data containing the repository information
+            pattern (str): The pattern to search for (e.g. "file://path/to/file.yaml")
+
+            For monorepo, the data should contain a "repo" key and a "folder" key with the repository information.
+            For non-monorepo, the data should contain the repository information directly.
+
+        Returns:
+            Any: The raw or parsed content of the file
+        """
+
+        repo_data = data.get("repo", data)
+        repo_slug = repo_data.get("name", "")
+        default_branch = repo_data.get("mainbranch", {}).get("name", "main")
+
+        if current_directory_path := data.get("folder", {}).get("path", ""):
+            file_path = f"{current_directory_path}/{pattern.replace(self.prefix, '')}"
+            ref = data.get("folder", {}).get("commit", {}).get("hash", default_branch)
+        else:
+            file_path = pattern.replace(self.prefix, "")
+            if not default_branch:
+                logger.info(
+                    f"No default branch found for repository {repo_slug} and file path {file_path}"
+                )
+                return None
+            ref = default_branch
+
+        if not repo_slug:
+            logger.info(
+                f"No repository slug found for branch {ref} and file path {file_path}"
+            )
+            return None
+
+        logger.info(
+            f"Searching for file {file_path} in Repository {repo_slug}, ref {ref}"
+        )
+        return await self._get_file_content(repo_slug, ref, file_path)
diff --git a/integrations/bitbucket-cloud/bitbucket_cloud/helpers/file_kind.py b/integrations/bitbucket-cloud/bitbucket_cloud/helpers/file_kind.py
@@ -0,0 +1,153 @@
+import fnmatch
+import json
+from typing import Dict, List, Any, AsyncGenerator
+from loguru import logger
+import yaml
+from integration import BitbucketFilePattern
+from port_ocean.utils.async_iterators import stream_async_iterators_tasks
+from initialize_client import init_client
+
+
+JSON_FILE_SUFFIX = ".json"
+YAML_FILE_SUFFIX = (".yaml", ".yml")
+
+
+def build_search_terms(
+    filename: str, repos: List[str] | None, path: str, extension: str
+) -> str:
+    """
+    This function builds search terms for Bitbucket's search API.
+    The entire workspace is searched for the filename if repos is not provided.
+    If repos are provided, only the repos specified are searched.
+    The path and extension are required to tailor the search so results
+    are relevant to the file kind.
+
+    Args:
+        filename (str): The filename to search for.
+        repos (List[str] | None): The repositories to search in.
+        path (str): The path to search in.
+        extension (str): The extension to search for.
+
+    Returns:
+        str: The search terms for Bitbucket's search API.
+    """
+    search_terms = [f'"{filename}"']
+    if repos:
+        repo_filters = " ".join(f"repo:{repo}" for repo in repos)
+        search_terms.append(f"{repo_filters}")
+
+    search_terms.append(f"path:{path}")
+
+    if extension:
+        search_terms.append(f"ext:{extension}")
+
+    return " ".join(search_terms)
+
+
+async def process_file_patterns(
+    file_pattern: BitbucketFilePattern,
+) -> AsyncGenerator[List[Dict[str, Any]], None]:
+    """Process file patterns and retrieve matching files using Bitbucket's search API."""
+    logger.info(
+        f"Searching for files in {len(file_pattern.repos) if file_pattern.repos else 'all'} repositories with pattern: {file_pattern.path}"
+    )
+
+    if not file_pattern.repos:
+        logger.warning("No repositories provided, searching entire workspace")
+    if not file_pattern.path:
+        logger.info("Path is required, skipping file search")
+        return
+    if not file_pattern.filenames:
+        logger.info("No filenames provided, skipping file search")
+        return
+
+    for filename in file_pattern.filenames:
+        search_query = build_search_terms(
+            filename=filename,
+            repos=file_pattern.repos,
+            path=file_pattern.path,
+            extension=filename.split(".")[-1],
+        )
+        logger.debug(f"Constructed search query: {search_query}")
+        bitbucket_client = init_client()
+        async for search_results in bitbucket_client.search_files(search_query):
+            tasks = []
+            for result in search_results:
+                if len(result["path_matches"]) >= 1:
+                    file_info = result["file"]
+                    file_path = file_info["path"]
+
+                    if not validate_file_match(file_path, filename, file_pattern.path):
+                        logger.debug(
+                            f"Skipping file {file_path} as it doesn't match expected patterns"
+                        )
+                        continue
+
+                    tasks.append(retrieve_file_content(file_info))
+
+            async for file_results in stream_async_iterators_tasks(*tasks):
+                if not file_pattern.skip_parsing:
+                    file_results = parse_file(file_results)
+                yield [file_results]
+
+
+async def retrieve_file_content(
+    file_info: Dict[str, Any],
+) -> AsyncGenerator[Dict[str, Any], None]:
+    """
+    Retrieve the content of a single file from Bitbucket.
+
+    Args:
+        file_info (Dict[str, Any]): Information about the file to retrieve
+
+    Yields:
+        Dict[str, Any]: Dictionary containing the file content and metadata
+    """
+    file_path = file_info.get("path", "")
+    repo_info = file_info["commit"]["repository"]
+    repo_slug = repo_info["name"]
+    branch = repo_info["mainbranch"]["name"]
+
+    logger.info(f"Retrieving contents for file: {file_path}")
+    bitbucket_client = init_client()
+    file_content = await bitbucket_client.get_repository_files(
+        repo_slug, branch, file_path
+    )
+
+    yield {
+        "content": file_content,
+        "repo": repo_info,
+        "branch": branch,
+        "metadata": file_info,
+    }
+
+
+def parse_file(file: Dict[str, Any]) -> Dict[str, Any]:
+    """Parse a file based on its extension."""
+    try:
+        file_path = file.get("metadata", {}).get("path", "")
+        file_content = file.get("content", "")
+        if file_path.endswith(JSON_FILE_SUFFIX):
+            loaded_file = json.loads(file_content)
+            file["content"] = loaded_file
+        elif file_path.endswith(YAML_FILE_SUFFIX):
+            loaded_file = yaml.safe_load(file_content)
+            file["content"] = loaded_file
+        return file
+    except Exception as e:
+        logger.error(f"Error parsing file: {e}")
+        return file
+
+
+def validate_file_match(file_path: str, filename: str, expected_path: str) -> bool:
+    """Validate if the file path and filename match the expected patterns."""
+    if not file_path.endswith(filename):
+        return False
+
+    if (not expected_path or expected_path == "/") and file_path == filename:
+        return True
+
+    dir_path = file_path[: -len(filename)]
+    dir_path = dir_path.rstrip("/")
+    expected_path = expected_path.rstrip("/")
+    return fnmatch.fnmatch(dir_path, expected_path)
diff --git a/integrations/bitbucket-cloud/bitbucket_cloud/helpers/utils.py b/integrations/bitbucket-cloud/bitbucket_cloud/helpers/utils.py
@@ -7,6 +7,7 @@ class ObjectKind(StrEnum):
     FOLDER = "folder"
     REPOSITORY = "repository"
     PULL_REQUEST = "pull-request"
+    FILE = "file"
 
 
 @dataclass