Handle malformed article URLs

2025-11-27 21:18:43 +08:00
parent 7f75b44af1
commit 061474c574
6 changed files with 3030 additions and 19 deletions
--- a/utils/url.test.ts
+++ b/utils/url.test.ts
@@ -0,0 +1,28 @@
+import { describe, expect, it } from 'vitest';
+import { getDisplayUrl, normalizeUrl } from './url';
+
+describe('normalizeUrl', () => {
+  it('adds https protocol when missing', () => {
+    expect(normalizeUrl('example.com/page')).toBe('https://example.com/page');
+  });
+});
+
+describe('getDisplayUrl', () => {
+  it('returns hostname and normalized href for valid URLs', () => {
+    const result = getDisplayUrl('https://example.com/path');
+    expect(result.hostname).toBe('example.com');
+    expect(result.href).toBe('https://example.com/path');
+  });
+
+  it('normalizes URLs without protocol for display', () => {
+    const result = getDisplayUrl('example.com/path');
+    expect(result.hostname).toBe('example.com');
+    expect(result.href).toBe('https://example.com/path');
+  });
+
+  it('falls back to raw URL when parsing fails', () => {
+    const result = getDisplayUrl('not a url');
+    expect(result.hostname).toBe('not a url');
+    expect(result.href).toBe('not a url');
+  });
+});
--- a/utils/url.ts
+++ b/utils/url.ts
@@ -0,0 +1,23 @@
+export const normalizeUrl = (url: string) => {
+  let cleanUrl = url.trim();
+  if (!cleanUrl.startsWith('http://') && !cleanUrl.startsWith('https://')) {
+    return `https://${cleanUrl}`;
+  }
+  return cleanUrl;
+};
+
+export const getDisplayUrl = (url: string): { href: string; hostname: string } => {
+  const normalized = normalizeUrl(url);
+
+  try {
+    const parsed = new URL(normalized);
+    return { href: normalized, hostname: parsed.hostname };
+  } catch {
+    try {
+      const fallback = new URL(url);
+      return { href: url, hostname: fallback.hostname };
+    } catch {
+      return { href: url, hostname: url };
+    }
+  }
+};