From 5b87d81ce9ceff1ebc3411dd98b4a5cb25b83193 Mon Sep 17 00:00:00 2001 From: JK Date: Wed, 18 Mar 2026 21:53:50 +0900 Subject: [PATCH 01/19] =?UTF-8?q?confluence-mdx:=20R2=20=E2=80=94=20SyncPr?= =?UTF-8?q?ofile=20=EC=A0=95=EC=9D=98=20=EB=B0=8F=20Config=EC=97=90=20sync?= =?UTF-8?q?=5Fcode=20=ED=95=84=EB=93=9C=EB=A5=BC=20=EC=B6=94=EA=B0=80?= =?UTF-8?q?=ED=95=A9=EB=8B=88=EB=8B=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - bin/fetch/sync_profiles.py 신규 추가: SyncProfile 데이터클래스 및 SYNC_PROFILES 딕셔너리 정의 (qm, qcp) - bin/fetch/config.py: sync_code 필드 추가 (기본값: "qm") - bin/fetch/config.py: pages_yaml_filename property 추가 (pages.{sync_code}.yaml) Co-Authored-By: Claude Sonnet 4.6 --- confluence-mdx/bin/fetch/config.py | 6 ++++++ confluence-mdx/bin/fetch/sync_profiles.py | 25 +++++++++++++++++++++++ 2 files changed, 31 insertions(+) create mode 100644 confluence-mdx/bin/fetch/sync_profiles.py diff --git a/confluence-mdx/bin/fetch/config.py b/confluence-mdx/bin/fetch/config.py index c402f0508..1fb98c813 100644 --- a/confluence-mdx/bin/fetch/config.py +++ b/confluence-mdx/bin/fetch/config.py @@ -15,6 +15,7 @@ class Config: """Centralized configuration management""" base_url: str = "https://querypie.atlassian.net/wiki" space_key: str = "QM" # Confluence space key + sync_code: str = "qm" # Sync profile code (see fetch/sync_profiles.py) days: Optional[int] = None # Number of days to look back (None = auto-detect from .fetch_state.yaml) default_start_page_id: str = "608501837" # Root Page ID of "QueryPie Docs" (for breadcrumbs) quick_start_page_id: str = "544375784" # QueryPie Overview having less children @@ -26,6 +27,11 @@ class Config: download_attachments: bool = False mode: str = "recent" # Mode: "local", "remote", or "recent" + @property + def pages_yaml_filename(self) -> str: + """Filename for pages YAML, derived from sync_code.""" + return f"pages.{self.sync_code}.yaml" + def __post_init__(self): if self.email is None: self.email = os.environ.get('ATLASSIAN_USERNAME', 'your-email@example.com') diff --git a/confluence-mdx/bin/fetch/sync_profiles.py b/confluence-mdx/bin/fetch/sync_profiles.py new file mode 100644 index 000000000..c7eadf876 --- /dev/null +++ b/confluence-mdx/bin/fetch/sync_profiles.py @@ -0,0 +1,25 @@ +"""Sync profile definitions for each Confluence Space.""" + +from dataclasses import dataclass + + +@dataclass +class SyncProfile: + """Configuration for a single Confluence Space sync target.""" + code: str + space_key: str + start_page_id: str + + +SYNC_PROFILES: dict[str, SyncProfile] = { + "qm": SyncProfile( + code="qm", + space_key="QM", + start_page_id="608501837", # QueryPie Docs 루트 + ), + "qcp": SyncProfile( + code="qcp", + space_key="QCP", + start_page_id="", # TBD: QCP Space 루트 페이지 ID + ), +} From 27aa2d2544dce37ddb22c52578aaa5d847b9586d Mon Sep 17 00:00:00 2001 From: JK Date: Wed, 18 Mar 2026 21:54:27 +0900 Subject: [PATCH 02/19] =?UTF-8?q?confluence-mdx:=20R1=20=E2=80=94=20pages.?= =?UTF-8?q?yaml=20=ED=8C=8C=EC=9D=BC=EB=AA=85=EC=9D=84=20Space=20=EC=BD=94?= =?UTF-8?q?=EB=93=9C=20=EA=B8=B0=EB=B0=98=EC=9C=BC=EB=A1=9C=20=EB=B6=84?= =?UTF-8?q?=EB=A6=AC=ED=95=A9=EB=8B=88=EB=8B=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - bin/fetch/processor.py: output_yaml_path를 pages.{sync_code}.yaml로 변경 - bin/fetch_cli.py: --sync-code 인자 추가 (기본값: "qm"), SYNC_PROFILES에서 space_key/start_page_id 기본값 로드 - bin/convert_all.py: --sync-code 인자 추가, --pages-yaml 기본값을 var/pages.{sync_code}.yaml로 자동 도출 Co-Authored-By: Claude Sonnet 4.6 --- confluence-mdx/bin/convert_all.py | 10 ++++++++-- confluence-mdx/bin/fetch/processor.py | 3 +-- confluence-mdx/bin/fetch_cli.py | 22 ++++++++++++++++------ 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/confluence-mdx/bin/convert_all.py b/confluence-mdx/bin/convert_all.py index 95a447545..cb57c1387 100755 --- a/confluence-mdx/bin/convert_all.py +++ b/confluence-mdx/bin/convert_all.py @@ -162,8 +162,10 @@ def main(): parser = argparse.ArgumentParser( description='Batch convert all Confluence pages to MDX using pages.yaml' ) - parser.add_argument('--pages-yaml', default='var/pages.yaml', - help='Path to pages.yaml (default: var/pages.yaml)') + parser.add_argument('--sync-code', default='qm', + help='Sync profile code; used to auto-derive --pages-yaml (default: %(default)s)') + parser.add_argument('--pages-yaml', default=None, + help='Path to pages YAML (default: var/pages..yaml)') parser.add_argument('--var-dir', default='var', help='Directory containing page data (default: var)') parser.add_argument('--output-dir', default='target/ko', @@ -181,6 +183,10 @@ def main(): help='Log level for converter/cli.py (default: warning)') args = parser.parse_args() + # Auto-derive pages-yaml from sync-code if not explicitly provided + if args.pages_yaml is None: + args.pages_yaml = f'var/pages.{args.sync_code}.yaml' + # Resolve relative paths against project root (confluence-mdx/) args.pages_yaml = _resolve(args.pages_yaml) args.var_dir = _resolve(args.var_dir) diff --git a/confluence-mdx/bin/fetch/processor.py b/confluence-mdx/bin/fetch/processor.py index 7c8a52622..91d148aa0 100644 --- a/confluence-mdx/bin/fetch/processor.py +++ b/confluence-mdx/bin/fetch/processor.py @@ -175,8 +175,7 @@ def run(self) -> None: self.logger.info(f"Created output directory: {self.config.default_output_dir}") # Prepare output file path - output_yaml_path = os.path.join(self.config.default_output_dir, "pages.yaml") - output_list_path = os.path.join(self.config.default_output_dir, "list.txt") + output_yaml_path = os.path.join(self.config.default_output_dir, self.config.pages_yaml_filename) start_page_id = self.config.default_start_page_id diff --git a/confluence-mdx/bin/fetch_cli.py b/confluence-mdx/bin/fetch_cli.py index 66d89bfbd..3ac8e1ee7 100755 --- a/confluence-mdx/bin/fetch_cli.py +++ b/confluence-mdx/bin/fetch_cli.py @@ -40,6 +40,7 @@ from fetch.config import Config from fetch.processor import ConfluencePageProcessor +from fetch.sync_profiles import SYNC_PROFILES def main(): @@ -48,12 +49,15 @@ def main(): parser = argparse.ArgumentParser( description="Generate a list of pages from a Confluence space" ) - parser.add_argument("--space-key", default=Config().space_key, - help=f"Confluence space key (default: %(default)s)") + parser.add_argument("--sync-code", default="qm", + choices=list(SYNC_PROFILES.keys()), + help="Sync profile code (default: %(default)s)") + parser.add_argument("--space-key", default=None, + help="Confluence space key (overrides sync profile default)") parser.add_argument("--days", type=int, default=None, help="Number of days to look back for modified pages (default: auto-detect from .fetch_state.yaml, fallback: 21)") - parser.add_argument("--start-page-id", default=Config().default_start_page_id, - help="Root page ID for building breadcrumbs (default: %(default)s)") + parser.add_argument("--start-page-id", default=None, + help="Root page ID for building breadcrumbs (overrides sync profile default)") parser.add_argument("--base-url", default=Config().base_url, help="Confluence base URL (default: %(default)s)") parser.add_argument("--email", default=Config().email, help="Confluence email for authentication") parser.add_argument("--api-token", default=Config().api_token, help="Confluence API token for authentication") @@ -88,15 +92,21 @@ def main(): # Determine mode (default to "recent" if not specified) mode = args.mode if args.mode else "recent" + # Load sync profile and resolve space_key / start_page_id + profile = SYNC_PROFILES.get(args.sync_code) + space_key = args.space_key or (profile.space_key if profile else Config().space_key) + start_page_id = args.start_page_id or (profile.start_page_id if profile else Config().default_start_page_id) + # Create configuration config = Config( base_url=args.base_url, - space_key=args.space_key, + space_key=space_key, + sync_code=args.sync_code, days=args.days, email=args.email, api_token=args.api_token, default_output_dir=args.output_dir, - default_start_page_id=args.start_page_id, + default_start_page_id=start_page_id, download_attachments=args.attachments, mode=mode ) From 9ffa5ff25f93c94e1c4f1482c7f76bc73bd1ae7d Mon Sep 17 00:00:00 2001 From: JK Date: Wed, 18 Mar 2026 21:56:24 +0900 Subject: [PATCH 03/19] =?UTF-8?q?confluence-mdx:=20R4=20=E2=80=94=20list.t?= =?UTF-8?q?xt=20=EC=83=9D=EC=84=B1=20=EA=B8=B0=EB=8A=A5=EC=9D=84=20?= =?UTF-8?q?=EC=A0=9C=EA=B1=B0=ED=95=A9=EB=8B=88=EB=8B=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - bin/fetch/processor.py: list_lines 수집 및 list.txt 저장 로직 제거 - bin/convert_all.py: --generate-list 플래그 및 generate_list_files() 함수 제거 - .gitignore: /var/list.txt 항목 제거 - README.md: list.txt 관련 설명 제거, pages..yaml 명칭으로 업데이트 Co-Authored-By: Claude Sonnet 4.6 --- confluence-mdx/.gitignore | 1 - confluence-mdx/README.md | 16 +++++------ confluence-mdx/bin/convert_all.py | 38 ++------------------------- confluence-mdx/bin/fetch/processor.py | 30 +++------------------ 4 files changed, 12 insertions(+), 73 deletions(-) diff --git a/confluence-mdx/.gitignore b/confluence-mdx/.gitignore index e0fcd0c34..e817fc2a5 100644 --- a/confluence-mdx/.gitignore +++ b/confluence-mdx/.gitignore @@ -8,5 +8,4 @@ /bin/mdx_to_storage/__pycache__/ /tests/__pycache__/ /tests/test_mdx_to_storage/__pycache__/ -/var/list.txt /reports/ diff --git a/confluence-mdx/README.md b/confluence-mdx/README.md index d63df558a..69a6fb9c6 100644 --- a/confluence-mdx/README.md +++ b/confluence-mdx/README.md @@ -80,10 +80,10 @@ pip3 install requests beautifulsoup4 pyyaml 1. `confluence-mdx/var/`에 Confluence 문서 데이터를 저장합니다. - 개별 문서마다 `/page.xhtml`, `/page.v1.yaml` 등을 저장합니다. - - 전체 문서 목록을 `var/pages.yaml`에 저장합니다. + - 전체 문서 목록을 `var/pages..yaml`에 저장합니다 (예: `var/pages.qm.yaml`). - `fetch_cli.py`를 사용합니다. 2. `src/content/ko/` 아래에 MDX 문서를 생성합니다. - - `var/pages.yaml`을 기반으로 모든 페이지를 변환합니다. + - `var/pages..yaml`을 기반으로 모든 페이지를 변환합니다. - `convert_all.py`를 사용합니다. 무작정 따라해 보기 @@ -130,8 +130,7 @@ bin/fetch_cli.py --attachments bin/fetch_cli.py --local # 로컬에서 fetch_cli.py 개선 과정에서, 반복실행할 때 사용하는 명령입니다. -# 또는, var/list.txt 를 업데이트하고자 하는 경우에 실행합니다. -bin/fetch_cli.py --local >var/list.txt +bin/fetch_cli.py --local # 특정 페이지 ID와 하위 문서를 내려받습니다. 첨부파일을 포함하여 내려받습니다. # 일부 문서만 변경한 경우, 해당 문서와 하위 페이지를 API 로 내려받아 저장할 때 사용합니다. @@ -156,7 +155,6 @@ bin/fetch_cli.py --log-level DEBUG 실행 결과: - `var/` 디렉토리에 문서 데이터가 저장됩니다. - 각 페이지 ID에 해당하는 디렉토리에 `page.yaml`과 `page.xhtml` 파일이 저장됩니다. -- `>list.txt`로 stdout 을 redirect 하면, `list.txt` 파일에 문서 목록이 저장됩니다. ### 2. 전체 변환 (convert_all.py) @@ -165,14 +163,14 @@ bin/fetch_cli.py --log-level DEBUG 실행 방법: ```bash -# 전체 변환 (번역 검증 포함) +# 전체 변환 (번역 검증 포함, 기본: --sync-code qm) bin/convert_all.py +# QCP Space 변환 +bin/convert_all.py --sync-code qcp + # 번역 검증만 수행 (변환하지 않음) bin/convert_all.py --verify-translations - -# 디버깅용 list.txt / list.en.txt 생성 (변환도 함께 수행) -bin/convert_all.py --generate-list ``` 실행 결과: diff --git a/confluence-mdx/bin/convert_all.py b/confluence-mdx/bin/convert_all.py index cb57c1387..97417ccc6 100755 --- a/confluence-mdx/bin/convert_all.py +++ b/confluence-mdx/bin/convert_all.py @@ -6,9 +6,9 @@ 하나의 명령으로 대체합니다. Usage: - bin/convert_all.py # 전체 변환 + bin/convert_all.py # 전체 변환 (기본: --sync-code qm) + bin/convert_all.py --sync-code qcp # QCP Space 변환 bin/convert_all.py --verify-translations # 번역 검증만 수행 - bin/convert_all.py --generate-list # list.txt / list.en.txt 생성 """ import argparse @@ -77,34 +77,6 @@ def verify_translations(pages: List[Dict], translations: Dict[str, str]) -> List return missing -def generate_list_files(pages: List[Dict], output_dir: str) -> None: - """Generate list.txt (Korean) and list.en.txt (English) from pages.yaml.""" - list_txt_lines = [] - list_en_lines = [] - - # Skip the root page (first entry, single breadcrumb) - root_page_id = pages[0]['page_id'] if pages else None - - for page in pages: - if page['page_id'] == root_page_id: - continue - breadcrumbs = page.get('breadcrumbs', []) - breadcrumbs_en = page.get('breadcrumbs_en', []) - list_txt_lines.append(f"{page['page_id']}\t{' />> '.join(breadcrumbs)}\n") - list_en_lines.append(f"{page['page_id']}\t{' />> '.join(breadcrumbs_en)}\n") - - list_txt_path = os.path.join(output_dir, 'list.txt') - list_en_path = os.path.join(output_dir, 'list.en.txt') - - with open(list_txt_path, 'w', encoding='utf-8') as f: - f.writelines(list_txt_lines) - print(f"Generated {list_txt_path} ({len(list_txt_lines)} entries)", file=sys.stderr) - - with open(list_en_path, 'w', encoding='utf-8') as f: - f.writelines(list_en_lines) - print(f"Generated {list_en_path} ({len(list_en_lines)} entries)", file=sys.stderr) - - def convert_all(pages: List[Dict], var_dir: str, output_base_dir: str, public_dir: str, log_level: str) -> int: """Run converter/cli.py for each page. Returns number of failures.""" @@ -176,8 +148,6 @@ def main(): help='Path to translations file') parser.add_argument('--verify-translations', action='store_true', help='Verify translation coverage and exit') - parser.add_argument('--generate-list', action='store_true', - help='Generate list.txt / list.en.txt for debugging') parser.add_argument('--log-level', default='warning', choices=['debug', 'info', 'warning', 'error', 'critical'], help='Log level for converter/cli.py (default: warning)') @@ -214,10 +184,6 @@ def main(): if args.verify_translations: sys.exit(0) - # --generate-list: generate list files - if args.generate_list: - generate_list_files(pages, args.var_dir) - # Run conversions failures = convert_all(pages, args.var_dir, args.output_dir, args.public_dir, args.log_level) diff --git a/confluence-mdx/bin/fetch/processor.py b/confluence-mdx/bin/fetch/processor.py index 91d148aa0..d9145ae96 100644 --- a/confluence-mdx/bin/fetch/processor.py +++ b/confluence-mdx/bin/fetch/processor.py @@ -231,9 +231,7 @@ def run(self) -> None: ) # Download each page through all 4 stages and output to stdout - # Store downloaded pages for list.txt self.logger.warning(f"Downloading {len(modified_pages)} recently modified pages") - downloaded_list_lines = [] skipped_count = 0 for entry in modified_pages: page_id = entry["id"] @@ -256,8 +254,6 @@ def run(self) -> None: # Output to stdout during download breadcrumbs_str = " />> ".join(page.breadcrumbs) if page.breadcrumbs else "" print(f"{page.page_id}\t{breadcrumbs_str}") - # Store for list.txt (only downloaded pages) - downloaded_list_lines.append(f"{page.page_id}\t{breadcrumbs_str}\n") except Exception as e: self.logger.error(f"Error downloading page ID {page_id}: {str(e)}") continue @@ -266,38 +262,25 @@ def run(self) -> None: self.logger.warning(f"Skipped {skipped_count} pages (already up-to-date)") # After downloading, process like local mode (hierarchical traversal from start_page_id) - # Generate pages.yaml and list.txt with full hierarchical tree (like --local mode) + # Generate pages.yaml with full hierarchical tree (like --local mode) # No stdout output in this phase (like --local mode) self.logger.warning(f"Processing page tree from start page ID {start_page_id} (local mode)") page_count = 0 yaml_entries = [] - list_lines = [] for page in self.fetch_page_tree_recursive(start_page_id, start_page_id, use_local=True): if page: - breadcrumbs_str = " />> ".join(page.breadcrumbs) if page.breadcrumbs else "" - # No stdout output in local mode - # Exclude start_page_id from list.txt (root page is not converted to MDX) - if page.page_id != start_page_id: - list_lines.append(f"{page.page_id}\t{breadcrumbs_str}\n") page_count += 1 yaml_entries.append(page.to_dict()) elif self.config.mode == "local": # --local mode: Process existing local files hierarchically from start_page_id - # No stdout output in local mode self.logger.warning(f"Local mode: Processing page tree from start page ID {start_page_id}") page_count = 0 yaml_entries = [] - list_lines = [] for page in self.fetch_page_tree_recursive(start_page_id, start_page_id, use_local=True): if page: - breadcrumbs_str = " />> ".join(page.breadcrumbs) if page.breadcrumbs else "" - # No stdout output in local mode - # Exclude start_page_id from list.txt (root page is not converted to MDX) - if page.page_id != start_page_id: - list_lines.append(f"{page.page_id}\t{breadcrumbs_str}\n") page_count += 1 yaml_entries.append(page.to_dict()) @@ -307,15 +290,13 @@ def run(self) -> None: self.logger.warning(f"Remote mode: Processing page tree from start page ID {start_page_id} via API") page_count = 0 yaml_entries = [] - list_lines = [] for page in self.fetch_page_tree_recursive(start_page_id, start_page_id, use_local=False): if page: - breadcrumbs_str = " />> ".join(page.breadcrumbs) if page.breadcrumbs else "" - # Exclude start_page_id from stdout and list.txt (root page is not converted to MDX) + # Exclude start_page_id from stdout (root page is not converted to MDX) if page.page_id != start_page_id: + breadcrumbs_str = " />> ".join(page.breadcrumbs) if page.breadcrumbs else "" print(f"{page.page_id}\t{breadcrumbs_str}") - list_lines.append(f"{page.page_id}\t{breadcrumbs_str}\n") page_count += 1 yaml_entries.append(page.to_dict()) @@ -347,11 +328,6 @@ def run(self) -> None: self.file_manager.save_yaml(output_yaml_path, yaml_entries) self.logger.info(f"YAML data saved to {output_yaml_path}") - # Save list.txt file - if list_lines: - self.file_manager.save_file(output_list_path, "".join(list_lines)) - self.logger.info(f"List file saved to {output_list_path}") - self.logger.info(f"Completed processing {page_count} pages") except Exception as e: self.logger.error(f"Error in main execution: {str(e)}") From ae1c307cb87cee350d4a33af9d527f2c5dd3c758 Mon Sep 17 00:00:00 2001 From: JK Date: Wed, 18 Mar 2026 21:57:01 +0900 Subject: [PATCH 04/19] =?UTF-8?q?confluence-mdx:=20R5=20=E2=80=94=20image?= =?UTF-8?q?=5Fstatus.py=EC=9D=98=20fetch=5Fstate=20=EB=A6=AC=ED=8F=AC?= =?UTF-8?q?=ED=8A=B8=EB=A5=BC=20Space=EB=B3=84=EB=A1=9C=20=EB=B6=84?= =?UTF-8?q?=EB=A6=AC=ED=95=A9=EB=8B=88=EB=8B=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - read_fetch_state() → read_fetch_states(): 모든 fetch_state.yaml을 순회하여 (root_id, state) 목록 반환 - format_report(): Fetch State를 Space별(code / root_id)로 구분 출력 - SYNC_PROFILES에서 root_id → code 매핑으로 사람이 읽기 쉬운 코드명 표시 Co-Authored-By: Claude Sonnet 4.6 --- confluence-mdx/bin/image_status.py | 37 ++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/confluence-mdx/bin/image_status.py b/confluence-mdx/bin/image_status.py index dff5c49c2..2e677b42c 100755 --- a/confluence-mdx/bin/image_status.py +++ b/confluence-mdx/bin/image_status.py @@ -20,6 +20,13 @@ import yaml +# Ensure bin/ is on sys.path for local package imports (fetch.sync_profiles) +_SCRIPT_DIR = Path(__file__).resolve().parent +if str(_SCRIPT_DIR) not in sys.path: + sys.path.insert(0, str(_SCRIPT_DIR)) + +from fetch.sync_profiles import SYNC_PROFILES + def read_build_date(workdir: Path) -> str: """Read image build date from .build-date file.""" @@ -29,12 +36,15 @@ def read_build_date(workdir: Path) -> str: return "unknown" -def read_fetch_state(var_dir: Path) -> dict: - """Find and read fetch_state.yaml.""" - for state_file in var_dir.glob("*/fetch_state.yaml"): +def read_fetch_states(var_dir: Path) -> list[tuple[str, dict]]: + """Find and read all fetch_state.yaml files, returning [(dir_name, state), ...].""" + states = [] + for state_file in sorted(var_dir.glob("*/fetch_state.yaml")): + dir_name = state_file.parent.name with open(state_file) as f: - return yaml.safe_load(f) or {} - return {} + state = yaml.safe_load(f) or {} + states.append((dir_name, state)) + return states def scan_pages(var_dir: Path) -> list[dict]: @@ -85,13 +95,16 @@ def format_report(workdir: Path, var_dir: Path, top_n: int) -> str: build_date = read_build_date(workdir) lines.append(f" Build Date : {build_date}") - # Fetch state - state = read_fetch_state(var_dir) - if state: - lines.append(f" Last Modified : {state.get('last_modified_seen', '?')}") - lines.append(f" Last Recent Fetch: {state.get('last_recent_fetch', '?')}") - lines.append(f" Last Full Fetch : {state.get('last_full_fetch', '?')}") - lines.append(f" Pages Fetched : {state.get('pages_fetched', '?')}") + # Fetch state (per space) + fetch_states = read_fetch_states(var_dir) + if fetch_states: + for root_id, state in fetch_states: + code = next((p.code for p in SYNC_PROFILES.values() if p.start_page_id == root_id), root_id) + lines.append(f" Fetch State [{code} / {root_id}]:") + lines.append(f" Last Modified : {state.get('last_modified_seen', '?')}") + lines.append(f" Last Recent Fetch: {state.get('last_recent_fetch', '?')}") + lines.append(f" Last Full Fetch : {state.get('last_full_fetch', '?')}") + lines.append(f" Pages Fetched : {state.get('pages_fetched', '?')}") else: lines.append(" Fetch State : not found") From e2056d979ed779504182c7f818dd99672964bfc6 Mon Sep 17 00:00:00 2001 From: JK Date: Wed, 18 Mar 2026 21:57:36 +0900 Subject: [PATCH 05/19] =?UTF-8?q?confluence-mdx:=20R6=20=E2=80=94=20Conflu?= =?UTF-8?q?ence=20folder=20content=20type=EC=9D=84=20=ED=8E=98=EC=9D=B4?= =?UTF-8?q?=EC=A7=80=20=ED=83=90=EC=83=89=EC=97=90=EC=84=9C=20=EC=98=AC?= =?UTF-8?q?=EB=B0=94=EB=A5=B4=EA=B2=8C=20=EC=B2=98=EB=A6=AC=ED=95=A9?= =?UTF-8?q?=EB=8B=88=EB=8B=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - api_client.py get_child_pages: folder ID일 때 /api/v2/folders/{id}/children 호출, type=page 필터 제거 - api_client.py get_page_data_v2: folder ID일 때 /api/v2/folders/{id} 호출로 분기 - stages.py Stage1Processor: 기존 page.v2.yaml의 type 필드를 읽어 content_type 결정 - stages.py _build_breadcrumbs: type 조건을 "page" → ("page", "folder")로 확장하여 folder 조상 포함 Co-Authored-By: Claude Sonnet 4.6 --- confluence-mdx/bin/fetch/api_client.py | 26 ++++++++++++++++++++------ confluence-mdx/bin/fetch/stages.py | 13 +++++++++---- 2 files changed, 29 insertions(+), 10 deletions(-) diff --git a/confluence-mdx/bin/fetch/api_client.py b/confluence-mdx/bin/fetch/api_client.py index a253faacf..51967ca08 100644 --- a/confluence-mdx/bin/fetch/api_client.py +++ b/confluence-mdx/bin/fetch/api_client.py @@ -53,14 +53,28 @@ def get_page_data_v1(self, page_id: str) -> Optional[Dict]: url = f"{self.config.base_url}/rest/api/content/{page_id}?expand=title,ancestors,body.storage,body.view" return self.make_request(url, "V1 API page data") - def get_page_data_v2(self, page_id: str) -> Optional[Dict]: - """Get page data using V2 API""" - url = f"{self.config.base_url}/api/v2/pages/{page_id}?body-format=atlas_doc_format" + def get_page_data_v2(self, page_id: str, content_type: str = "page") -> Optional[Dict]: + """Get page data using V2 API. + + Uses /api/v2/folders/{id} for folder content type, /api/v2/pages/{id} otherwise. + """ + if content_type == "folder": + url = f"{self.config.base_url}/api/v2/folders/{page_id}" + else: + url = f"{self.config.base_url}/api/v2/pages/{page_id}?body-format=atlas_doc_format" return self.make_request(url, "V2 API page data") - def get_child_pages(self, page_id: str) -> Optional[Dict]: - """Get child pages using V2 API""" - url = f"{self.config.base_url}/api/v2/pages/{page_id}/children?type=page&limit=100" + def get_child_pages(self, page_id: str, content_type: str = "page") -> Optional[Dict]: + """Get child pages using V2 API. + + Uses /api/v2/folders/{id}/children for folder content type, + /api/v2/pages/{id}/children for page content type. + The type=page filter is omitted so that folder children are also included. + """ + if content_type == "folder": + url = f"{self.config.base_url}/api/v2/folders/{page_id}/children?limit=100" + else: + url = f"{self.config.base_url}/api/v2/pages/{page_id}/children?limit=100" return self.make_request(url, "V2 API child pages") def get_attachments(self, page_id: str) -> Optional[Dict]: diff --git a/confluence-mdx/bin/fetch/stages.py b/confluence-mdx/bin/fetch/stages.py index 55068f384..79694200d 100644 --- a/confluence-mdx/bin/fetch/stages.py +++ b/confluence-mdx/bin/fetch/stages.py @@ -44,6 +44,11 @@ def process(self, page_id: str) -> None: directory = self.get_page_directory(page_id) self.file_manager.ensure_directory(directory) + # Determine content type from existing page.v2.yaml (for folder routing) + v2_path = os.path.join(self.get_page_directory(page_id), "page.v2.yaml") + existing_v2 = self.file_manager.load_yaml(v2_path) if os.path.exists(v2_path) else None + content_type = (existing_v2 or {}).get("type", "page") + api_operations = [ { 'operation': lambda: self.api_client.get_page_data_v1(page_id), @@ -51,12 +56,12 @@ def process(self, page_id: str) -> None: 'filename': "page.v1.yaml" }, { - 'operation': lambda: self.api_client.get_page_data_v2(page_id), + 'operation': lambda: self.api_client.get_page_data_v2(page_id, content_type), 'description': "V2 API page data", 'filename': "page.v2.yaml" }, { - 'operation': lambda: self.api_client.get_child_pages(page_id), + 'operation': lambda: self.api_client.get_child_pages(page_id, content_type), 'description': "V2 API child pages", 'filename': "children.v2.yaml" }, @@ -291,7 +296,7 @@ def _build_breadcrumbs( filtered_ancestors: List[str] = [] found_start_page = False for ancestor in ancestors: - if ancestor.get("type") == "page": + if ancestor.get("type") in ("page", "folder"): if ancestor["id"] == start_page_id: found_start_page = True continue @@ -304,7 +309,7 @@ def _build_breadcrumbs( else: # Include all ancestors ancestor_titles = [ - clean_text(ancestor["title"]) for ancestor in ancestors if ancestor.get("type") == "page" and "title" in ancestor + clean_text(ancestor["title"]) for ancestor in ancestors if ancestor.get("type") in ("page", "folder") and "title" in ancestor ] path = ancestor_titles + [title] From 40622a28ad88ebd7a57ac07464e6d323f00351ad Mon Sep 17 00:00:00 2001 From: JK Date: Wed, 18 Mar 2026 21:58:20 +0900 Subject: [PATCH 06/19] =?UTF-8?q?confluence-mdx:=20entrypoint.sh=EA=B3=BC?= =?UTF-8?q?=20compose.yml=EC=9D=84=20=EB=A9=80=ED=8B=B0=20Space=20?= =?UTF-8?q?=EC=A7=80=EC=9B=90=EC=9C=BC=EB=A1=9C=20=EC=97=85=EB=8D=B0?= =?UTF-8?q?=EC=9D=B4=ED=8A=B8=ED=95=A9=EB=8B=88=EB=8B=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - scripts/entrypoint.sh full 커맨드: --sync-code를 파싱하여 convert_all.py에 전달 - scripts/entrypoint.sh full-all 커맨드 추가: qm, qcp Space를 순차 실행 - scripts/entrypoint.sh help 텍스트 업데이트 - compose.yml: 볼륨 마운트를 pages.yaml → pages.qm.yaml로 변경 Co-Authored-By: Claude Sonnet 4.6 --- confluence-mdx/compose.yml | 4 +-- confluence-mdx/scripts/entrypoint.sh | 38 +++++++++++++++++++++++----- 2 files changed, 33 insertions(+), 9 deletions(-) diff --git a/confluence-mdx/compose.yml b/confluence-mdx/compose.yml index 84c48775d..c4c9e18b5 100644 --- a/confluence-mdx/compose.yml +++ b/confluence-mdx/compose.yml @@ -49,8 +49,8 @@ services: volumes: # Use translation file from host - ./etc/korean-titles-translations.txt:/workdir/etc/korean-titles-translations.txt - # Mount files in var to host - - ./var/pages.yaml:/workdir/var/pages.yaml + # Mount pages YAML files in var to host (one per Space) + - ./var/pages.qm.yaml:/workdir/var/pages.qm.yaml # Mount output directories to host (matching symlink structure in target/) # target/ko -> ../../src/content/ko - ../src/content/ko:/workdir/target/ko diff --git a/confluence-mdx/scripts/entrypoint.sh b/confluence-mdx/scripts/entrypoint.sh index 9f430744f..f6cadb71d 100755 --- a/confluence-mdx/scripts/entrypoint.sh +++ b/confluence-mdx/scripts/entrypoint.sh @@ -38,14 +38,36 @@ case "${1:-help}" in echo "+ bin/$command $@" exec bin/$command "$@" ;; - full) # Execute full workflow + full) # Execute full workflow for a single Space print_image_info shift - echo "# Starting full workflow..." + # Extract --sync-code value from args (default: qm) + sync_code="qm" + prev_arg="" + for arg in "$@"; do + if [[ "$prev_arg" == "--sync-code" ]]; then + sync_code="$arg" + elif [[ "$arg" == "--sync-code="* ]]; then + sync_code="${arg#--sync-code=}" + fi + prev_arg="$arg" + done + echo "# Starting full workflow (sync-code: $sync_code)..." echo "+ bin/fetch_cli.py $@" bin/fetch_cli.py "$@" - echo "+ bin/convert_all.py" - bin/convert_all.py + echo "+ bin/convert_all.py --sync-code $sync_code" + bin/convert_all.py --sync-code "$sync_code" + ;; + full-all) # Execute full workflow for all Spaces + print_image_info + shift + for CODE in qm qcp; do + echo "# Starting full workflow for Space: $CODE..." + echo "+ bin/fetch_cli.py --sync-code $CODE $@" + bin/fetch_cli.py --sync-code "$CODE" "$@" + echo "+ bin/convert_all.py --sync-code $CODE" + bin/convert_all.py --sync-code "$CODE" + done ;; status) # Show detailed var/ data status report exec bin/image_status.py "${@:2}" @@ -66,7 +88,8 @@ Usage: Commands: fetch_cli.py [args...] - Collect Confluence data convert_all.py [args...] - Convert all pages to MDX - full [fetch args...] - Execute full workflow (fetch + convert) + full [fetch args...] - Execute full workflow for a single Space (default: --sync-code qm) + full-all [fetch args...] - Execute full workflow for all Spaces (qm, qcp) sequentially converter/cli.py - Convert a single XHTML to MDX status - Show var/ data freshness report bash - Run interactive shell @@ -74,8 +97,9 @@ Commands: Examples: docker run docker.io/querypie/confluence-mdx:latest full - docker run docker.io/querypie/confluence-mdx:latest full --recent - docker run docker.io/querypie/confluence-mdx:latest convert_all.py + docker run docker.io/querypie/confluence-mdx:latest full --sync-code qm --recent + docker run docker.io/querypie/confluence-mdx:latest full-all + docker run docker.io/querypie/confluence-mdx:latest convert_all.py --sync-code qm docker run docker.io/querypie/confluence-mdx:latest fetch_cli.py --attachments docker run docker.io/querypie/confluence-mdx:latest status docker run -v \$(pwd)/target:/workdir/target docker.io/querypie/confluence-mdx:latest full --local From cdcd01448aa59b809d64bc9e4239b1dd212f34c6 Mon Sep 17 00:00:00 2001 From: JK Date: Wed, 18 Mar 2026 21:58:32 +0900 Subject: [PATCH 07/19] =?UTF-8?q?confluence-mdx:=20var/pages.yaml=20?= =?UTF-8?q?=E2=86=92=20var/pages.qm.yaml=20=EB=A7=88=EC=9D=B4=EA=B7=B8?= =?UTF-8?q?=EB=A0=88=EC=9D=B4=EC=85=98=ED=95=A9=EB=8B=88=EB=8B=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 이슈 #936 R1의 파일명 변경 규칙에 따라 기존 pages.yaml을 pages.qm.yaml로 이름을 변경합니다. Co-Authored-By: Claude Sonnet 4.6 --- confluence-mdx/var/{pages.yaml => pages.qm.yaml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename confluence-mdx/var/{pages.yaml => pages.qm.yaml} (100%) diff --git a/confluence-mdx/var/pages.yaml b/confluence-mdx/var/pages.qm.yaml similarity index 100% rename from confluence-mdx/var/pages.yaml rename to confluence-mdx/var/pages.qm.yaml From e47ee03fd28edde133db3b9a1272a9ab7ad4c6e9 Mon Sep 17 00:00:00 2001 From: JK Date: Wed, 18 Mar 2026 22:05:28 +0900 Subject: [PATCH 08/19] =?UTF-8?q?confluence-mdx:=20QCP=20Space=20=EB=A3=A8?= =?UTF-8?q?=ED=8A=B8=20=ED=8E=98=EC=9D=B4=EC=A7=80=20ID=EB=A5=BC=20?= =?UTF-8?q?=EA=B8=B0=EB=A1=9D=ED=95=A9=EB=8B=88=EB=8B=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sync_profiles.py의 qcp 프로필에 start_page_id를 채웁니다. 루트: https://querypie.atlassian.net/wiki/spaces/QCP/folder/887849063 Co-Authored-By: Claude Sonnet 4.6 --- confluence-mdx/bin/fetch/sync_profiles.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluence-mdx/bin/fetch/sync_profiles.py b/confluence-mdx/bin/fetch/sync_profiles.py index c7eadf876..4a7e27ec7 100644 --- a/confluence-mdx/bin/fetch/sync_profiles.py +++ b/confluence-mdx/bin/fetch/sync_profiles.py @@ -20,6 +20,6 @@ class SyncProfile: "qcp": SyncProfile( code="qcp", space_key="QCP", - start_page_id="", # TBD: QCP Space 루트 페이지 ID + start_page_id="887849063", # QCP Space 루트 (https://querypie.atlassian.net/wiki/spaces/QCP/folder/887849063) ), } From 1ca25b0ece6f8d34270fd51692a270c175f6be8f Mon Sep 17 00:00:00 2001 From: JK Date: Thu, 19 Mar 2026 13:29:09 +0900 Subject: [PATCH 09/19] =?UTF-8?q?confluence-mdx:=20reverse=5Fsync=5Fcli.py?= =?UTF-8?q?=EC=9D=98=20pages.yaml=20=EC=B0=B8=EC=A1=B0=EB=A5=BC=20pages.qm?= =?UTF-8?q?.yaml=EB=A1=9C=20=EC=97=85=EB=8D=B0=EC=9D=B4=ED=8A=B8=ED=95=A9?= =?UTF-8?q?=EB=8B=88=EB=8B=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit R1 파일명 변경(_resolve_page_id, _resolve_attachment_dir)에 따라 reverse_sync_cli.py와 테스트 코드에 남아 있던 pages.yaml 하드코딩을 수정합니다. Co-Authored-By: Claude Sonnet 4.6 --- confluence-mdx/bin/reverse_sync_cli.py | 14 +++++++------- confluence-mdx/tests/test_reverse_sync_cli.py | 10 +++++----- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/confluence-mdx/bin/reverse_sync_cli.py b/confluence-mdx/bin/reverse_sync_cli.py index d6234990e..5897c141c 100755 --- a/confluence-mdx/bin/reverse_sync_cli.py +++ b/confluence-mdx/bin/reverse_sync_cli.py @@ -101,26 +101,26 @@ def _get_changed_ko_mdx_files(branch: str) -> List[str]: def _resolve_page_id(ko_mdx_path: str) -> str: - """src/content/ko/...mdx 경로에서 pages.yaml을 이용해 page_id를 유도한다.""" + """src/content/ko/...mdx 경로에서 pages.qm.yaml을 이용해 page_id를 유도한다.""" rel = ko_mdx_path.removeprefix('src/content/ko/').removesuffix('.mdx') path_parts = rel.split('/') - pages_path = _PROJECT_DIR / 'var' / 'pages.yaml' + pages_path = _PROJECT_DIR / 'var' / 'pages.qm.yaml' if not pages_path.exists(): - raise ValueError("var/pages.yaml not found") + raise ValueError("var/pages.qm.yaml not found") pages = yaml.safe_load(pages_path.read_text()) for page in pages: if page.get('path') == path_parts: return page['page_id'] - raise ValueError(f"MDX path '{ko_mdx_path}' not found in var/pages.yaml") + raise ValueError(f"MDX path '{ko_mdx_path}' not found in var/pages.qm.yaml") def _resolve_attachment_dir(page_id: str) -> str: - """page_id에서 pages.yaml의 path를 조회하여 attachment-dir를 반환.""" - pages = yaml.safe_load((_PROJECT_DIR / 'var' / 'pages.yaml').read_text()) + """page_id에서 pages.qm.yaml의 path를 조회하여 attachment-dir를 반환.""" + pages = yaml.safe_load((_PROJECT_DIR / 'var' / 'pages.qm.yaml').read_text()) for page in pages: if page['page_id'] == page_id: return '/' + '/'.join(page['path']) - raise ValueError(f"page_id '{page_id}' not found in var/pages.yaml") + raise ValueError(f"page_id '{page_id}' not found in var/pages.qm.yaml") def _detect_language(descriptor: str) -> str: diff --git a/confluence-mdx/tests/test_reverse_sync_cli.py b/confluence-mdx/tests/test_reverse_sync_cli.py index 57f901644..32522ff7b 100644 --- a/confluence-mdx/tests/test_reverse_sync_cli.py +++ b/confluence-mdx/tests/test_reverse_sync_cli.py @@ -232,7 +232,7 @@ def test_extract_ko_mdx_path_invalid(): def test_resolve_page_id(tmp_path, monkeypatch): - """pages.yaml에서 MDX 경로로 page_id를 유도한다.""" + """pages.qm.yaml에서 MDX 경로로 page_id를 유도한다.""" import yaml monkeypatch.chdir(tmp_path) var_dir = tmp_path / "var" @@ -241,22 +241,22 @@ def test_resolve_page_id(tmp_path, monkeypatch): {'page_id': '544112828', 'path': ['user-manual', 'user-agent']}, {'page_id': '123456789', 'path': ['overview']}, ] - (var_dir / 'pages.yaml').write_text(yaml.dump(pages)) + (var_dir / 'pages.qm.yaml').write_text(yaml.dump(pages)) result = _resolve_page_id('src/content/ko/user-manual/user-agent.mdx') assert result == '544112828' def test_resolve_page_id_not_found(tmp_path, monkeypatch): - """pages.yaml에 없는 경로이면 ValueError를 발생시킨다.""" + """pages.qm.yaml에 없는 경로이면 ValueError를 발생시킨다.""" import yaml monkeypatch.chdir(tmp_path) var_dir = tmp_path / "var" var_dir.mkdir() pages = [{'page_id': '111', 'path': ['other']}] - (var_dir / 'pages.yaml').write_text(yaml.dump(pages)) + (var_dir / 'pages.qm.yaml').write_text(yaml.dump(pages)) - with pytest.raises(ValueError, match="not found in var/pages.yaml"): + with pytest.raises(ValueError, match="not found in var/pages.qm.yaml"): _resolve_page_id('src/content/ko/nonexistent/page.mdx') From 1f293bca28985a2ebac201cb35be99593ef1eb55 Mon Sep 17 00:00:00 2001 From: JK Date: Thu, 19 Mar 2026 13:34:26 +0900 Subject: [PATCH 10/19] =?UTF-8?q?confluence-mdx:=20run-tests.sh=EC=9D=98?= =?UTF-8?q?=20pages.yaml=20=EC=B0=B8=EC=A1=B0=EB=A5=BC=20pages.qm.yaml?= =?UTF-8?q?=EB=A1=9C=20=EC=97=85=EB=8D=B0=EC=9D=B4=ED=8A=B8=ED=95=A9?= =?UTF-8?q?=EB=8B=88=EB=8B=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit resolve_slug_path()가 var/pages.yaml을 하드코딩으로 읽어 --attachment-dir를 결정하는 로직을 pages.qm.yaml 우선 탐색 후 pages.yaml fallback으로 변경합니다. Co-Authored-By: Claude Sonnet 4.6 --- confluence-mdx/tests/run-tests.sh | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/confluence-mdx/tests/run-tests.sh b/confluence-mdx/tests/run-tests.sh index c3b17d84b..4c2f2dd9d 100755 --- a/confluence-mdx/tests/run-tests.sh +++ b/confluence-mdx/tests/run-tests.sh @@ -121,17 +121,25 @@ activate_venv() { source "${VENV_DIR}/bin/activate" } -# Resolve page_id → slug path from pages.yaml +# Resolve page_id → slug path from pages.qm.yaml (falls back to pages.yaml for compatibility) resolve_slug_path() { local page_id="$1" python3 -c " import sys, yaml -pages = yaml.safe_load(open('${VENV_DIR}/../var/pages.yaml')) +from pathlib import Path +var_dir = Path('${VENV_DIR}/../var') +pages_file = var_dir / 'pages.qm.yaml' +if not pages_file.exists(): + pages_file = var_dir / 'pages.yaml' +if not pages_file.exists(): + print(f'ERROR: pages.qm.yaml not found in {var_dir}', file=sys.stderr) + sys.exit(1) +pages = yaml.safe_load(pages_file.open()) for p in pages: if str(p.get('page_id', '')) == sys.argv[1]: print('/' + '/'.join(p['path'])) sys.exit(0) -print(f'ERROR: page_id {sys.argv[1]} not found in pages.yaml', file=sys.stderr) +print(f'ERROR: page_id {sys.argv[1]} not found in {pages_file.name}', file=sys.stderr) sys.exit(1) " "${page_id}" } From 9fd3b42fcf5099899ded413433ac14ead696f76f Mon Sep 17 00:00:00 2001 From: JK Date: Thu, 19 Mar 2026 13:47:20 +0900 Subject: [PATCH 11/19] =?UTF-8?q?confluence-mdx:=20converter/cli.py?= =?UTF-8?q?=EC=9D=98=20pages.yaml=20=ED=83=90=EC=83=89=EC=97=90=20pages.qm?= =?UTF-8?q?.yaml=20fallback=EC=9D=84=20=EC=B6=94=EA=B0=80=ED=95=A9?= =?UTF-8?q?=EB=8B=88=EB=8B=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pages.yaml이 pages.qm.yaml로 이름이 변경됨에 따라, converter가 내부 링크 해석을 위해 pages.yaml을 로드하는 경로를 pages.qm.yaml 우선 탐색 후 pages.yaml fallback으로 변경합니다. 이를 통해 cross-reference 링크가 #link-error로 출력되는 문제를 해결합니다. Co-Authored-By: Claude Sonnet 4.6 --- confluence-mdx/bin/converter/cli.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/confluence-mdx/bin/converter/cli.py b/confluence-mdx/bin/converter/cli.py index 416b98458..3324546dd 100755 --- a/confluence-mdx/bin/converter/cli.py +++ b/confluence-mdx/bin/converter/cli.py @@ -176,8 +176,12 @@ def main(): # 원본 XHTML 보존 — sidecar mapping에서 사용 xhtml_original = html_content - # Load pages.yaml to get the current page's path - pages_yaml_path = os.path.join(input_dir, '..', 'pages.yaml') + # Load pages YAML to get the current page's path. + # Try pages.qm.yaml first (new naming), fall back to pages.yaml (legacy). + var_dir = os.path.join(input_dir, '..') + pages_yaml_path = os.path.join(var_dir, 'pages.qm.yaml') + if not os.path.exists(pages_yaml_path): + pages_yaml_path = os.path.join(var_dir, 'pages.yaml') load_pages_yaml(pages_yaml_path, PAGES_BY_TITLE, PAGES_BY_ID) # Load page.v1.yaml: --page-dir 우선, 없으면 input_dir에서 탐색 From f20190f431bb2d580ccf07a34f5d0a81252c0765 Mon Sep 17 00:00:00 2001 From: JK Date: Thu, 19 Mar 2026 15:18:12 +0900 Subject: [PATCH 12/19] =?UTF-8?q?chore:=20.codegraph/,=20confluence-mdx/do?= =?UTF-8?q?cs/superpowers/,=20confluence-mdx/public/=20=EB=A5=BC=20.gitign?= =?UTF-8?q?ore=20=EC=97=90=20=EC=B6=94=EA=B0=80=ED=95=A9=EB=8B=88=EB=8B=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - /.codegraph/ — CodeGraph가 자동 생성하는 시맨틱 인덱스 DB - confluence-mdx/docs/superpowers/ — Claude Superpowers가 자동 생성하는 스킬 문서 - confluence-mdx/public/ — 재변환 임시 출력 디렉토리 Co-Authored-By: Claude Sonnet 4.6 --- .gitignore | 3 +++ confluence-mdx/.gitignore | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/.gitignore b/.gitignore index 986be94ec..78dcd11e9 100644 --- a/.gitignore +++ b/.gitignore @@ -52,3 +52,6 @@ next-env.d.ts # git worktrees .worktrees/ + +# CodeGraph semantic index (auto-generated) +/.codegraph/ diff --git a/confluence-mdx/.gitignore b/confluence-mdx/.gitignore index e817fc2a5..ab6bd9bd1 100644 --- a/confluence-mdx/.gitignore +++ b/confluence-mdx/.gitignore @@ -9,3 +9,9 @@ /tests/__pycache__/ /tests/test_mdx_to_storage/__pycache__/ /reports/ + +# Claude Superpowers auto-generated skill docs +/docs/superpowers/ + +# Temporary public output (reconverted pages, etc.) +/public/ From 9712421deca28c48d8e0bb94193915e5a15416d9 Mon Sep 17 00:00:00 2001 From: JK Date: Thu, 19 Mar 2026 19:11:22 +0900 Subject: [PATCH 13/19] =?UTF-8?q?chore:=20confluence-mdx/.gitignore=20?= =?UTF-8?q?=EC=97=90=EC=84=9C=20=EA=B0=9C=EC=9D=B8=20=EB=A1=9C=EC=BB=AC=20?= =?UTF-8?q?=ED=95=AD=EB=AA=A9=EC=9D=84=20=EC=A0=9C=EA=B1=B0=ED=95=A9?= =?UTF-8?q?=EB=8B=88=EB=8B=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit docs/superpowers/, public/ 은 개인 로컬 도구가 생성하는 임시 디렉토리로, 팀 공용 .gitignore 대신 .git/info/exclude 로 이동합니다. Co-Authored-By: Claude Sonnet 4.6 --- confluence-mdx/.gitignore | 6 ------ 1 file changed, 6 deletions(-) diff --git a/confluence-mdx/.gitignore b/confluence-mdx/.gitignore index ab6bd9bd1..e817fc2a5 100644 --- a/confluence-mdx/.gitignore +++ b/confluence-mdx/.gitignore @@ -9,9 +9,3 @@ /tests/__pycache__/ /tests/test_mdx_to_storage/__pycache__/ /reports/ - -# Claude Superpowers auto-generated skill docs -/docs/superpowers/ - -# Temporary public output (reconverted pages, etc.) -/public/ From c9a4bf014d3c00fdb9d9373626ceda8910b8696e Mon Sep 17 00:00:00 2001 From: JK Date: Thu, 19 Mar 2026 20:14:34 +0900 Subject: [PATCH 14/19] =?UTF-8?q?confluence-mdx:=20folder=20=EB=A3=A8?= =?UTF-8?q?=ED=8A=B8=20=ED=8E=98=EC=9D=B4=EC=A7=80=EC=9D=98=20=EC=B2=AB=20?= =?UTF-8?q?fetch=20=EC=8B=A4=ED=8C=A8=EB=A5=BC=20=EC=88=98=EC=A0=95?= =?UTF-8?q?=ED=95=A9=EB=8B=88=EB=8B=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SyncProfile에 root_content_type 필드를 추가하고, Stage 1이 page.v2.yaml이 없는 클린 환경에서 루트 페이지를 처리할 때 이 값을 사용하도록 합니다. QCP Space 루트(887849063)는 folder이므로 root_content_type="folder"로 설정합니다. 기존 동작: page.v2.yaml 없음 → content_type="page" (기본값) → /api/v2/pages/{folder_id} → 404 또는 빈 응답 → 첫 QCP fetch 실패 수정 후: page.v2.yaml 없고 루트 페이지이면 → root_content_type → /api/v2/folders/{id} → 정상 응답 Co-Authored-By: Claude Sonnet 4.6 --- confluence-mdx/bin/fetch/config.py | 7 +++++++ confluence-mdx/bin/fetch/stages.py | 13 +++++++++++-- confluence-mdx/bin/fetch/sync_profiles.py | 7 +++++++ confluence-mdx/bin/fetch_cli.py | 4 +++- 4 files changed, 28 insertions(+), 3 deletions(-) diff --git a/confluence-mdx/bin/fetch/config.py b/confluence-mdx/bin/fetch/config.py index 1fb98c813..d55beeca1 100644 --- a/confluence-mdx/bin/fetch/config.py +++ b/confluence-mdx/bin/fetch/config.py @@ -18,6 +18,13 @@ class Config: sync_code: str = "qm" # Sync profile code (see fetch/sync_profiles.py) days: Optional[int] = None # Number of days to look back (None = auto-detect from .fetch_state.yaml) default_start_page_id: str = "608501837" # Root Page ID of "QueryPie Docs" (for breadcrumbs) + root_content_type: str = "page" + """Confluence content type of the root page ('page' or 'folder'). + + Used by Stage 1 when page.v2.yaml does not yet exist (first run on a clean + environment), so the correct API endpoint is selected from the start. + Populated from SyncProfile.root_content_type in fetch_cli.py. + """ quick_start_page_id: str = "544375784" # QueryPie Overview having less children default_output_dir: str = "var" cache_dir: str = "cache" diff --git a/confluence-mdx/bin/fetch/stages.py b/confluence-mdx/bin/fetch/stages.py index 79694200d..eb28a7fa9 100644 --- a/confluence-mdx/bin/fetch/stages.py +++ b/confluence-mdx/bin/fetch/stages.py @@ -44,10 +44,19 @@ def process(self, page_id: str) -> None: directory = self.get_page_directory(page_id) self.file_manager.ensure_directory(directory) - # Determine content type from existing page.v2.yaml (for folder routing) + # Determine content type for API routing: + # 1. Prefer the type stored in page.v2.yaml (present on re-runs). + # 2. Fall back to config.root_content_type when processing the root + # page on a clean environment (page.v2.yaml does not yet exist). + # 3. Default to "page" for all other pages without cached data. v2_path = os.path.join(self.get_page_directory(page_id), "page.v2.yaml") existing_v2 = self.file_manager.load_yaml(v2_path) if os.path.exists(v2_path) else None - content_type = (existing_v2 or {}).get("type", "page") + if existing_v2: + content_type = existing_v2.get("type", "page") + elif page_id == self.config.default_start_page_id: + content_type = self.config.root_content_type + else: + content_type = "page" api_operations = [ { diff --git a/confluence-mdx/bin/fetch/sync_profiles.py b/confluence-mdx/bin/fetch/sync_profiles.py index 4a7e27ec7..a345ffa5f 100644 --- a/confluence-mdx/bin/fetch/sync_profiles.py +++ b/confluence-mdx/bin/fetch/sync_profiles.py @@ -9,6 +9,12 @@ class SyncProfile: code: str space_key: str start_page_id: str + root_content_type: str = "page" + """Confluence content type of the root page ('page' or 'folder'). + + Used by Stage 1 when page.v2.yaml does not yet exist (e.g. first run on a + clean environment) so the correct API endpoint is called from the start. + """ SYNC_PROFILES: dict[str, SyncProfile] = { @@ -21,5 +27,6 @@ class SyncProfile: code="qcp", space_key="QCP", start_page_id="887849063", # QCP Space 루트 (https://querypie.atlassian.net/wiki/spaces/QCP/folder/887849063) + root_content_type="folder", # 887849063 is a Confluence folder, not a page ), } diff --git a/confluence-mdx/bin/fetch_cli.py b/confluence-mdx/bin/fetch_cli.py index 3ac8e1ee7..0f103c49e 100755 --- a/confluence-mdx/bin/fetch_cli.py +++ b/confluence-mdx/bin/fetch_cli.py @@ -92,10 +92,11 @@ def main(): # Determine mode (default to "recent" if not specified) mode = args.mode if args.mode else "recent" - # Load sync profile and resolve space_key / start_page_id + # Load sync profile and resolve space_key / start_page_id / root_content_type profile = SYNC_PROFILES.get(args.sync_code) space_key = args.space_key or (profile.space_key if profile else Config().space_key) start_page_id = args.start_page_id or (profile.start_page_id if profile else Config().default_start_page_id) + root_content_type = profile.root_content_type if profile else "page" # Create configuration config = Config( @@ -107,6 +108,7 @@ def main(): api_token=args.api_token, default_output_dir=args.output_dir, default_start_page_id=start_page_id, + root_content_type=root_content_type, download_attachments=args.attachments, mode=mode ) From 2af1dbb30785a2a15bc86c1df87bfa761ec4c207 Mon Sep 17 00:00:00 2001 From: JK Date: Thu, 19 Mar 2026 20:14:42 +0900 Subject: [PATCH 15/19] =?UTF-8?q?confluence-mdx:=20converter=20subprocess?= =?UTF-8?q?=EC=97=90=20pages-yaml=20=EA=B2=BD=EB=A1=9C=EB=A5=BC=20?= =?UTF-8?q?=EC=A0=84=EB=8B=AC=ED=95=A9=EB=8B=88=EB=8B=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit convert_all.py가 --sync-code qcp로 pages.qcp.yaml을 읽지만 하위 converter subprocess에 이 경로를 전달하지 않아 converter/cli.py가 항상 pages.qm.yaml을 로드하는 버그를 수정합니다. - converter/cli.py에 --pages-yaml 옵션을 추가합니다. - convert_all.py가 pages_yaml 경로를 cmd에 포함하여 전달합니다. - test_reverse_sync_e2e.py fixture가 pages.qm.yaml을 복사하도록 수정합니다. Co-Authored-By: Claude Sonnet 4.6 --- confluence-mdx/bin/convert_all.py | 7 +++++-- confluence-mdx/bin/converter/cli.py | 15 ++++++++++----- confluence-mdx/tests/test_reverse_sync_e2e.py | 11 +++++++---- 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/confluence-mdx/bin/convert_all.py b/confluence-mdx/bin/convert_all.py index 97417ccc6..753c436a2 100755 --- a/confluence-mdx/bin/convert_all.py +++ b/confluence-mdx/bin/convert_all.py @@ -78,7 +78,7 @@ def verify_translations(pages: List[Dict], translations: Dict[str, str]) -> List def convert_all(pages: List[Dict], var_dir: str, output_base_dir: str, public_dir: str, - log_level: str) -> int: + log_level: str, pages_yaml: str = '') -> int: """Run converter/cli.py for each page. Returns number of failures.""" # Skip the root page root_page_id = pages[0]['page_id'] if pages else None @@ -120,6 +120,8 @@ def convert_all(pages: List[Dict], var_dir: str, output_base_dir: str, public_di f'--attachment-dir={attachment_dir}', f'--log-level={log_level}', ] + if pages_yaml: + cmd.append(f'--pages-yaml={pages_yaml}') print(f"[{i}/{total}] {page_id} → {output_file}", file=sys.stderr) result = subprocess.run(cmd, capture_output=True, text=True) @@ -185,7 +187,8 @@ def main(): sys.exit(0) # Run conversions - failures = convert_all(pages, args.var_dir, args.output_dir, args.public_dir, args.log_level) + failures = convert_all(pages, args.var_dir, args.output_dir, args.public_dir, args.log_level, + pages_yaml=args.pages_yaml) if failures: print(f"\nCompleted with {failures} failure(s) out of {len(pages)} pages", file=sys.stderr) diff --git a/confluence-mdx/bin/converter/cli.py b/confluence-mdx/bin/converter/cli.py index 3324546dd..5586722b0 100755 --- a/confluence-mdx/bin/converter/cli.py +++ b/confluence-mdx/bin/converter/cli.py @@ -123,6 +123,8 @@ def main(): parser.add_argument('--language', choices=['ko', 'ja', 'en'], help='언어 코드를 명시적으로 지정 (미지정 시 출력 경로에서 자동 감지)') + parser.add_argument('--pages-yaml', + help='pages..yaml 경로 (미지정 시 input_dir/../pages.qm.yaml → pages.yaml 순으로 탐색)') parser.add_argument('--page-dir', help='page.v1.yaml 등 페이지 데이터 디렉토리 (기본: input 파일의 디렉토리)') parser.add_argument('--log-level', @@ -176,12 +178,15 @@ def main(): # 원본 XHTML 보존 — sidecar mapping에서 사용 xhtml_original = html_content - # Load pages YAML to get the current page's path. - # Try pages.qm.yaml first (new naming), fall back to pages.yaml (legacy). + # Load pages YAML for internal link resolution and _meta.ts generation. + # Priority: --pages-yaml arg > pages.qm.yaml (new naming) > pages.yaml (legacy). var_dir = os.path.join(input_dir, '..') - pages_yaml_path = os.path.join(var_dir, 'pages.qm.yaml') - if not os.path.exists(pages_yaml_path): - pages_yaml_path = os.path.join(var_dir, 'pages.yaml') + if args.pages_yaml: + pages_yaml_path = args.pages_yaml + else: + pages_yaml_path = os.path.join(var_dir, 'pages.qm.yaml') + if not os.path.exists(pages_yaml_path): + pages_yaml_path = os.path.join(var_dir, 'pages.yaml') load_pages_yaml(pages_yaml_path, PAGES_BY_TITLE, PAGES_BY_ID) # Load page.v1.yaml: --page-dir 우선, 없으면 input_dir에서 탐색 diff --git a/confluence-mdx/tests/test_reverse_sync_e2e.py b/confluence-mdx/tests/test_reverse_sync_e2e.py index ad730b3c9..0c24af4bb 100644 --- a/confluence-mdx/tests/test_reverse_sync_e2e.py +++ b/confluence-mdx/tests/test_reverse_sync_e2e.py @@ -124,10 +124,13 @@ def setup_var_793608206(self, tmp_path, monkeypatch): monkeypatch.chdir(tmp_path) dest = tmp_path / "var" / "793608206" shutil.copytree(VAR_DIR, dest) - # pages.yaml도 복사 (converter가 {input_dir}/../pages.yaml 을 참조) - pages_yaml = VAR_DIR.parent / "pages.yaml" - if pages_yaml.exists(): - shutil.copy2(pages_yaml, tmp_path / "var" / "pages.yaml") + # pages..yaml 복사 (converter가 {input_dir}/../pages.qm.yaml 을 참조) + # pages.qm.yaml 우선, 없으면 레거시 pages.yaml fallback + pages_yaml_src = VAR_DIR.parent / "pages.qm.yaml" + if not pages_yaml_src.exists(): + pages_yaml_src = VAR_DIR.parent / "pages.yaml" + if pages_yaml_src.exists(): + shutil.copy2(pages_yaml_src, tmp_path / "var" / "pages.qm.yaml") # _PROJECT_DIR을 tmp_path로 패치하여 run_verify가 tmp_path/var/ 를 사용하도록 함 import reverse_sync_cli monkeypatch.setattr(reverse_sync_cli, '_PROJECT_DIR', tmp_path) From da8535e86cb11763395024dee621a9d0c9869b7c Mon Sep 17 00:00:00 2001 From: JK Date: Thu, 19 Mar 2026 20:14:49 +0900 Subject: [PATCH 16/19] =?UTF-8?q?confluence-mdx:=20compose.yml=EC=97=90=20?= =?UTF-8?q?pages.qcp.yaml=20bind=20mount=EB=A5=BC=20=EC=B6=94=EA=B0=80?= =?UTF-8?q?=ED=95=A9=EB=8B=88=EB=8B=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit full-all 실행 시 컨테이너 안에서 생성된 pages.qcp.yaml이 호스트에 남지 않는 문제를 수정합니다. pages.qcp.yaml을 호스트와 bind mount하여 컨테이너 종료 후에도 QCP catalog를 호스트 측 도구에서 참조할 수 있도록 합니다. 빈 pages.qcp.yaml placeholder를 추가하여 Docker가 bind mount 대상이 없을 때 빈 디렉토리를 생성하는 동작을 방지합니다. Co-Authored-By: Claude Sonnet 4.6 --- confluence-mdx/compose.yml | 6 +++++- confluence-mdx/var/pages.qcp.yaml | 0 2 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 confluence-mdx/var/pages.qcp.yaml diff --git a/confluence-mdx/compose.yml b/confluence-mdx/compose.yml index c4c9e18b5..55d8acc47 100644 --- a/confluence-mdx/compose.yml +++ b/confluence-mdx/compose.yml @@ -49,8 +49,12 @@ services: volumes: # Use translation file from host - ./etc/korean-titles-translations.txt:/workdir/etc/korean-titles-translations.txt - # Mount pages YAML files in var to host (one per Space) + # Mount pages YAML files in var to host (one per Space). + # These files are created/updated by fetch_cli.py and must persist on the + # host so that subsequent runs and host-side tools (convert_all.py, etc.) + # can read the catalog. Add a new line here for each new Space. - ./var/pages.qm.yaml:/workdir/var/pages.qm.yaml + - ./var/pages.qcp.yaml:/workdir/var/pages.qcp.yaml # Mount output directories to host (matching symlink structure in target/) # target/ko -> ../../src/content/ko - ../src/content/ko:/workdir/target/ko diff --git a/confluence-mdx/var/pages.qcp.yaml b/confluence-mdx/var/pages.qcp.yaml new file mode 100644 index 000000000..e69de29bb From 3ef51f27812bd7977c5ae640473abf81d4745805 Mon Sep 17 00:00:00 2001 From: JK Date: Thu, 19 Mar 2026 23:35:05 +0900 Subject: [PATCH 17/19] =?UTF-8?q?confluence-mdx:=20unused=5Fattachments.py?= =?UTF-8?q?,=20link=5Fresolver.py=EB=A5=BC=20sync=5Fprofiles=20=EA=B8=B0?= =?UTF-8?q?=EB=B0=98=EC=9C=BC=EB=A1=9C=20=EC=88=98=EC=A0=95=ED=95=A9?= =?UTF-8?q?=EB=8B=88=EB=8B=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pages.yaml 하드코딩을 pages..yaml로 교체합니다. unused_attachments.py: - SYNC_PROFILES import 추가 - --sync-code 인자 추가 (choices=SYNC_PROFILES.keys(), 기본값 qm) - load_pages_yaml이 pages..yaml을 로드하고 pages.yaml을 fallback으로 사용 - find_unused_attachments에 sync_code 파라미터 전파 link_resolver.py: - SYNC_PROFILES import 추가 - LinkResolver 기본 경로를 pages..yaml로 변경하고 pages.yaml fallback 유지 Co-Authored-By: Claude Sonnet 4.6 --- .../bin/mdx_to_storage/link_resolver.py | 14 +++++++- confluence-mdx/bin/unused_attachments.py | 36 ++++++++++++++----- 2 files changed, 40 insertions(+), 10 deletions(-) diff --git a/confluence-mdx/bin/mdx_to_storage/link_resolver.py b/confluence-mdx/bin/mdx_to_storage/link_resolver.py index 8c520f501..24b2cf893 100644 --- a/confluence-mdx/bin/mdx_to_storage/link_resolver.py +++ b/confluence-mdx/bin/mdx_to_storage/link_resolver.py @@ -5,12 +5,20 @@ from dataclasses import dataclass, field import posixpath import re +import sys from pathlib import Path from typing import Any, Optional from urllib.parse import unquote import yaml +# Ensure bin/ is on sys.path for fetch package imports +_BIN_DIR = Path(__file__).resolve().parent.parent # confluence-mdx/bin/ +if str(_BIN_DIR) not in sys.path: + sys.path.insert(0, str(_BIN_DIR)) + +from fetch.sync_profiles import SYNC_PROFILES + _EXTERNAL_SCHEME_RE = re.compile(r"^[a-zA-Z][a-zA-Z0-9+.-]*:") @@ -55,7 +63,11 @@ class LinkResolver: def __init__(self, pages: Optional[list[PageEntry] | Path] = None) -> None: if pages is None: - pages = Path(__file__).resolve().parents[2] / "var" / "pages.yaml" + var_dir = Path(__file__).resolve().parents[2] / "var" + default_code = next(iter(SYNC_PROFILES), "qm") + pages = var_dir / f"pages.{default_code}.yaml" + if not pages.exists(): + pages = var_dir / "pages.yaml" if isinstance(pages, Path): pages = load_pages_yaml(pages) diff --git a/confluence-mdx/bin/unused_attachments.py b/confluence-mdx/bin/unused_attachments.py index 3f4fdaa97..d8751fb58 100755 --- a/confluence-mdx/bin/unused_attachments.py +++ b/confluence-mdx/bin/unused_attachments.py @@ -26,7 +26,14 @@ import yaml # Resolve project root (confluence-mdx/) from bin/unused_attachments.py -_PROJECT_DIR = Path(__file__).resolve().parent.parent # confluence-mdx/ +_BIN_DIR = Path(__file__).resolve().parent # confluence-mdx/bin/ +_PROJECT_DIR = _BIN_DIR.parent # confluence-mdx/ + +# Ensure bin/ is on sys.path for fetch package imports +if str(_BIN_DIR) not in sys.path: + sys.path.insert(0, str(_BIN_DIR)) + +from fetch.sync_profiles import SYNC_PROFILES def normalize_filename(name: str) -> str: @@ -34,9 +41,14 @@ def normalize_filename(name: str) -> str: return unicodedata.normalize('NFC', name) -def load_pages_yaml(var_dir: Path) -> list[dict]: - """var/pages.yaml에서 전체 페이지 목록을 로드합니다.""" - pages_file = var_dir / "pages.yaml" +def load_pages_yaml(var_dir: Path, sync_code: str = "qm") -> list[dict]: + """var/pages..yaml에서 전체 페이지 목록을 로드합니다. + + pages..yaml이 없으면 레거시 pages.yaml을 fallback으로 사용합니다. + """ + pages_file = var_dir / f"pages.{sync_code}.yaml" + if not pages_file.exists(): + pages_file = var_dir / "pages.yaml" if not pages_file.exists(): return [] with open(pages_file, encoding='utf-8') as f: @@ -122,7 +134,8 @@ def build_cross_reference_index(references: dict[str, set[str]], def find_unused_attachments(var_dir: Path, page_ids: Optional[list[str]] = None, - logger: Optional[logging.Logger] = None) -> list[dict]: + logger: Optional[logging.Logger] = None, + sync_code: str = "qm") -> list[dict]: """미사용 첨부파일을 검출합니다. Returns: @@ -133,7 +146,7 @@ def find_unused_attachments(var_dir: Path, # 전체 페이지 목록 결정 if page_ids is None: - pages = load_pages_yaml(var_dir) + pages = load_pages_yaml(var_dir, sync_code) all_page_ids = [p["page_id"] for p in pages] else: all_page_ids = page_ids @@ -261,7 +274,12 @@ def delete_attachments(unused: list[dict], config, logger: logging.Logger) -> tu def main(): parser = argparse.ArgumentParser( - description="Confluence QM Space 첨부파일 사용 여부 검사 및 삭제" + description="Confluence Space 첨부파일 사용 여부 검사 및 삭제" + ) + parser.add_argument( + "--sync-code", default="qm", + choices=list(SYNC_PROFILES.keys()), + help="Sync profile code; pages..yaml을 로드합니다 (기본: %(default)s)" ) parser.add_argument( "--var-dir", default=None, @@ -306,11 +324,11 @@ def main(): page_ids = [pid.strip() for pid in args.page_id.split(",")] # 미사용 첨부파일 검출 - unused = find_unused_attachments(var_dir, page_ids, logger) + unused = find_unused_attachments(var_dir, page_ids, logger, sync_code=args.sync_code) # 전체 첨부파일 수 계산 (보고용) if page_ids is None: - pages = load_pages_yaml(var_dir) + pages = load_pages_yaml(var_dir, args.sync_code) all_page_ids = [p["page_id"] for p in pages] else: all_page_ids = page_ids From 1c081f9110a3e87470340f25be547dc8e7919593 Mon Sep 17 00:00:00 2001 From: JK Date: Thu, 19 Mar 2026 23:46:04 +0900 Subject: [PATCH 18/19] =?UTF-8?q?confluence-mdx:=20find=5Fmdx=5Fwith=5Ftex?= =?UTF-8?q?t.py=EC=99=80=20README.md=EB=A5=BC=20pages..yaml=20?= =?UTF-8?q?=EA=B7=9C=EC=B9=99=EC=97=90=20=EB=A7=9E=EA=B2=8C=20=EC=88=98?= =?UTF-8?q?=EC=A0=95=ED=95=A9=EB=8B=88=EB=8B=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit find_mdx_with_text.py: - --sync-code 인자 추가 (choices=SYNC_PROFILES.keys(), 기본값 qm) - --pages-yaml 기본값을 None으로 변경하고, sync_code로부터 자동 파생 (pages..yaml 우선, pages.yaml fallback) README.md: - convert_all.py 설명의 var/pages.yaml → var/pages..yaml Co-Authored-By: Claude Sonnet 4.6 --- confluence-mdx/README.md | 2 +- confluence-mdx/bin/find_mdx_with_text.py | 25 +++++++++++++++++++++--- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/confluence-mdx/README.md b/confluence-mdx/README.md index 69a6fb9c6..9d1c7c221 100644 --- a/confluence-mdx/README.md +++ b/confluence-mdx/README.md @@ -158,7 +158,7 @@ bin/fetch_cli.py --log-level DEBUG ### 2. 전체 변환 (convert_all.py) -`convert_all.py`는 `var/pages.yaml`을 기반으로 모든 페이지를 MDX로 변환하는 스크립트입니다. +`convert_all.py`는 `var/pages..yaml`을 기반으로 모든 페이지를 MDX로 변환하는 스크립트입니다. 변환 전에 번역 누락을 자동 검증합니다. 실행 방법: diff --git a/confluence-mdx/bin/find_mdx_with_text.py b/confluence-mdx/bin/find_mdx_with_text.py index 2d25e45ae..25394a10e 100755 --- a/confluence-mdx/bin/find_mdx_with_text.py +++ b/confluence-mdx/bin/find_mdx_with_text.py @@ -24,6 +24,13 @@ import yaml +# Ensure bin/ is on sys.path for fetch package imports +_BIN_DIR = Path(__file__).resolve().parent # confluence-mdx/bin/ +if str(_BIN_DIR) not in sys.path: + sys.path.insert(0, str(_BIN_DIR)) + +from fetch.sync_profiles import SYNC_PROFILES + # Configure logging logging.basicConfig( level=logging.INFO, @@ -194,11 +201,17 @@ def main(): default='src/content/ko', help='Content directory to search (default: src/content/ko)' ) + parser.add_argument( + '--sync-code', + default='qm', + choices=list(SYNC_PROFILES.keys()), + help='Sync profile code; pages..yaml을 로드합니다 (기본: %(default)s)' + ) parser.add_argument( '--pages-yaml', type=str, - default='var/pages.yaml', - help='Path to pages.yaml file (default: var/pages.yaml)' + default=None, + help='Path to pages YAML file (기본: var/pages..yaml)' ) parser.add_argument( '--workspace-root', @@ -218,7 +231,13 @@ def main(): # Resolve paths content_dir = workspace_root / args.content_dir - pages_yaml_path = workspace_root / 'confluence-mdx' / args.pages_yaml + confluence_mdx_dir = workspace_root / 'confluence-mdx' + if args.pages_yaml: + pages_yaml_path = confluence_mdx_dir / args.pages_yaml + else: + pages_yaml_path = confluence_mdx_dir / f'var/pages.{args.sync_code}.yaml' + if not pages_yaml_path.exists(): + pages_yaml_path = confluence_mdx_dir / 'var/pages.yaml' logging.info(f"Searching for text: '{args.search_text}'") logging.info(f"Content directory: {content_dir}") From d93702f76a96f81a97346a06533cee590209f765 Mon Sep 17 00:00:00 2001 From: JK Date: Fri, 20 Mar 2026 01:34:11 +0900 Subject: [PATCH 19/19] =?UTF-8?q?confluence-mdx:=20find=5Fmdx=5Fwith=5Ftex?= =?UTF-8?q?t.py=20Confluence=20=EB=A7=81=ED=81=AC=EB=A5=BC=20sync=5Fcode?= =?UTF-8?q?=EC=9D=98=20space=5Fkey=20=EA=B8=B0=EB=B0=98=EC=9C=BC=EB=A1=9C?= =?UTF-8?q?=20=EC=83=9D=EC=84=B1=ED=95=A9=EB=8B=88=EB=8B=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --sync-code qcp 사용 시 결과 링크가 spaces/QM/pages로 고정되는 버그를 수정합니다. SYNC_PROFILES[sync_code].space_key에서 space_key를 파생하여 spaces/{SPACE_KEY}/pages/{page_id} 형식으로 링크를 생성합니다. Co-Authored-By: Claude Sonnet 4.6 --- confluence-mdx/bin/find_mdx_with_text.py | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/confluence-mdx/bin/find_mdx_with_text.py b/confluence-mdx/bin/find_mdx_with_text.py index 25394a10e..24e1ba21c 100755 --- a/confluence-mdx/bin/find_mdx_with_text.py +++ b/confluence-mdx/bin/find_mdx_with_text.py @@ -37,8 +37,7 @@ format='%(levelname)s: %(message)s' ) -# Confluence base URL -CONFLUENCE_BASE_URL = "https://querypie.atlassian.net/wiki/spaces/QM/pages" +CONFLUENCE_BASE = "https://querypie.atlassian.net/wiki/spaces" def find_mdx_files_with_text(content_dir: Path, search_text: str) -> List[Path]: @@ -172,17 +171,9 @@ def find_page_by_path(pages_by_path: Dict, mdx_path: List[str]) -> Optional[Dict return None -def generate_confluence_link(page_id: str) -> str: - """ - Generate Confluence document link - - Args: - page_id: Confluence page ID - - Returns: - Confluence URL - """ - return f"{CONFLUENCE_BASE_URL}/{page_id}" +def generate_confluence_link(page_id: str, space_key: str) -> str: + """Generate Confluence document link for the given space.""" + return f"{CONFLUENCE_BASE}/{space_key}/pages/{page_id}" def main(): @@ -259,6 +250,9 @@ def main(): logging.error("No pages loaded from pages.yaml. Cannot generate links.") return 1 + # Derive space_key from sync profile + space_key = SYNC_PROFILES[args.sync_code].space_key + # Find matching pages and generate links results = [] for mdx_file in matching_files: @@ -269,7 +263,7 @@ def main(): page_id = page_info.get('page_id') title = page_info.get('title', 'Unknown') title_orig = page_info.get('title_orig', 'Unknown') - confluence_link = generate_confluence_link(page_id) + confluence_link = generate_confluence_link(page_id, space_key) results.append({ 'mdx_file': mdx_file.relative_to(workspace_root),