feat: scraping, database and tooling
This commit is contained in:
11
.env.example
Normal file
11
.env.example
Normal file
@@ -0,0 +1,11 @@
|
||||
DATABASE_URL=
|
||||
POSTGRES_USER=
|
||||
POSTGRES_PASSWORD=
|
||||
POSTGRES_DB=
|
||||
WEBSITE_URL=
|
||||
ANCHOR_ELEMENT=
|
||||
ITERATIVE_CLASS=
|
||||
NAME_CLASS=
|
||||
AREAS_CLASS=
|
||||
URL_CLASS=
|
||||
PAGINATE_CLASS=
|
||||
22
.eslintrc.json
Normal file
22
.eslintrc.json
Normal file
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"env": {
|
||||
"browser": true,
|
||||
"es2021": true
|
||||
},
|
||||
"extends": [
|
||||
"next/core-web-vitals",
|
||||
"eslint:recommended",
|
||||
"plugin:@typescript-eslint/recommended",
|
||||
"prettier"
|
||||
],
|
||||
"parser": "@typescript-eslint/parser",
|
||||
"parserOptions": {
|
||||
"ecmaVersion": "latest",
|
||||
"sourceType": "module"
|
||||
},
|
||||
"plugins": ["@typescript-eslint"],
|
||||
"rules": {
|
||||
"@typescript-eslint/no-unused-vars": "error",
|
||||
"@typescript-eslint/consistent-type-definitions": ["error", "type"]
|
||||
}
|
||||
}
|
||||
4
.husky/commit-msg
Executable file
4
.husky/commit-msg
Executable file
@@ -0,0 +1,4 @@
|
||||
#!/usr/bin/env sh
|
||||
. "$(dirname -- "$0")/_/husky.sh"
|
||||
|
||||
npx --no-install commitlint --edit $1
|
||||
7
.husky/pre-commit
Executable file
7
.husky/pre-commit
Executable file
@@ -0,0 +1,7 @@
|
||||
#!/usr/bin/env sh
|
||||
. "$(dirname -- "$0")/_/husky.sh"
|
||||
|
||||
yarn audit
|
||||
yarn format
|
||||
yarn lint
|
||||
yarn typecheck
|
||||
9
.prettierrc
Normal file
9
.prettierrc
Normal file
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"semi": true,
|
||||
"trailingComma": "none",
|
||||
"singleQuote": true,
|
||||
"printWidth": 80,
|
||||
"jsxSingleQuote": true,
|
||||
"tabWidth": 2,
|
||||
"arrowParens": "avoid"
|
||||
}
|
||||
12
Dockerfile
Normal file
12
Dockerfile
Normal file
@@ -0,0 +1,12 @@
|
||||
FROM node:18 as builder
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY package*.json yarn.lock ./
|
||||
RUN yarn install --frozen-lockfile --production
|
||||
|
||||
COPY . .
|
||||
|
||||
EXPOSE 3000
|
||||
|
||||
CMD ["bash", "-c", "yarn db:generate && yarn db:migrate && yarn start"]
|
||||
72
README.md
72
README.md
@@ -1 +1,71 @@
|
||||
# xray-scraper
|
||||
# xray-scrap-test
|
||||
|
||||
## Setup
|
||||
|
||||
Run docker-compose to start (the database is exposed on port 5432 on localhost, and the credentials are in .env)
|
||||
|
||||
```bash
|
||||
docker-compose up --build
|
||||
```
|
||||
|
||||
## Commands
|
||||
|
||||
Run in development mode
|
||||
|
||||
```bash
|
||||
yarn dev
|
||||
```
|
||||
|
||||
Run in production mode
|
||||
|
||||
```bash
|
||||
yarn start
|
||||
```
|
||||
|
||||
Lint the code
|
||||
|
||||
```bash
|
||||
yarn lint
|
||||
```
|
||||
|
||||
Type check the code
|
||||
|
||||
```bash
|
||||
yarn typecheck
|
||||
```
|
||||
|
||||
Format the code
|
||||
|
||||
```bash
|
||||
yarn format
|
||||
```
|
||||
|
||||
Install Git hooks
|
||||
|
||||
```bash
|
||||
yarn prepare
|
||||
```
|
||||
|
||||
Run the migrations
|
||||
|
||||
```bash
|
||||
yarn db:migrate
|
||||
```
|
||||
|
||||
Add a new migration
|
||||
|
||||
```bash
|
||||
yarn db:add-migration
|
||||
```
|
||||
|
||||
Generate the prisma client
|
||||
|
||||
```bash
|
||||
yarn db:generate
|
||||
```
|
||||
|
||||
Wipe the database
|
||||
|
||||
```bash
|
||||
yarn db:reset
|
||||
```
|
||||
|
||||
1
commitlint.config.ts
Normal file
1
commitlint.config.ts
Normal file
@@ -0,0 +1 @@
|
||||
module.exports = { extends: ['@commitlint/config-conventional'] };
|
||||
42
docker-compose.yml
Normal file
42
docker-compose.yml
Normal file
@@ -0,0 +1,42 @@
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
backend:
|
||||
build:
|
||||
context: ./
|
||||
dockerfile: Dockerfile
|
||||
image: backend
|
||||
container_name: backend
|
||||
restart: no
|
||||
ports:
|
||||
- '3000:3000'
|
||||
environment:
|
||||
- PORT=3000
|
||||
- DATABASE_URL=postgres://${POSTGRES_USER}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB}
|
||||
- POSTGRES_USER=${POSTGRES_USER}
|
||||
- POSTGRES_PASSWORD=${POSTGRES_PASSWORD}
|
||||
- POSTGRES_DB=${POSTGRES_DB}
|
||||
- WEBSITE_URL=${WEBSITE_URL}
|
||||
- ANCHOR_ELEMENT=${ANCHOR_ELEMENT}
|
||||
- ITERATIVE_CLASS=${ITERATIVE_CLASS}
|
||||
- NAME_CLASS=${NAME_CLASS}
|
||||
- AREAS_CLASS=${AREAS_CLASS}
|
||||
- URL_CLASS=${URL_CLASS}
|
||||
- PAGINATE_CLASS=${PAGINATE_CLASS}
|
||||
depends_on:
|
||||
- postgres
|
||||
postgres:
|
||||
image: postgres:latest
|
||||
container_name: postgres
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
POSTGRES_USER: ${POSTGRES_USER}
|
||||
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
|
||||
POSTGRES_DB: ${POSTGRES_DB}
|
||||
ports:
|
||||
- '5432:5432'
|
||||
volumes:
|
||||
- pgdata:/var/lib/postgresql/data
|
||||
|
||||
volumes:
|
||||
pgdata:
|
||||
53
package.json
Normal file
53
package.json
Normal file
@@ -0,0 +1,53 @@
|
||||
{
|
||||
"name": "next-newsletter",
|
||||
"version": "0.1.0",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"dev": "nodemon --watch 'src/**/*.ts' --exec 'ts-node' src/index.ts",
|
||||
"start": "ts-node src/index.ts",
|
||||
"lint": "eslint --ext .ts . --fix",
|
||||
"typecheck": "tsc --noEmit",
|
||||
"format": "prettier --config .prettierrc 'src/**/*.ts' --write",
|
||||
"prepare": "husky install",
|
||||
"db:add-migration": "npx prisma migrate dev",
|
||||
"db:migrate": "prisma migrate deploy",
|
||||
"db:generate": "prisma generate",
|
||||
"db:reset": "prisma migrate reset --force"
|
||||
},
|
||||
"dependencies": {
|
||||
"@prisma/client": "^5.6.0",
|
||||
"prisma": "^5.6.0",
|
||||
"x-ray": "^2.3.4",
|
||||
"zod": "^3.22.4",
|
||||
"zod-validation-error": "^1.5.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@commitlint/cli": "^17.6.6",
|
||||
"@commitlint/config-conventional": "^17.6.6",
|
||||
"@types/node": "^20",
|
||||
"@types/x-ray": "^2.3.6",
|
||||
"@typescript-eslint/eslint-plugin": "^6.0.0",
|
||||
"@typescript-eslint/parser": "^6.0.0",
|
||||
"eslint": "^8",
|
||||
"eslint-config-next": "14.0.3",
|
||||
"eslint-config-prettier": "^8.8.0",
|
||||
"husky": "^8.0.0",
|
||||
"lint-staged": "^13.2.3",
|
||||
"nodemon": "^3.0.1",
|
||||
"prettier": "^3.0.0",
|
||||
"ts-node": "^10.9.1",
|
||||
"typescript": "^5.1.6"
|
||||
},
|
||||
"resolutions": {
|
||||
"nth-check": "^2.0.1",
|
||||
"debug": "^4.3.1"
|
||||
},
|
||||
"lint-staged": {
|
||||
"*.ts": [
|
||||
"eslint --quiet --fix"
|
||||
],
|
||||
"*.{json,ts}": [
|
||||
"prettier --write --ignore-unknown"
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,13 @@
|
||||
-- CreateTable
|
||||
CREATE TABLE "Record" (
|
||||
"id" TEXT NOT NULL,
|
||||
"datetime" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
"name" TEXT NOT NULL,
|
||||
"areas" TEXT,
|
||||
"url" TEXT NOT NULL,
|
||||
|
||||
CONSTRAINT "Record_pkey" PRIMARY KEY ("id")
|
||||
);
|
||||
|
||||
-- CreateIndex
|
||||
CREATE UNIQUE INDEX "Record_url_key" ON "Record"("url");
|
||||
3
prisma/migrations/migration_lock.toml
Normal file
3
prisma/migrations/migration_lock.toml
Normal file
@@ -0,0 +1,3 @@
|
||||
# Please do not edit this file manually
|
||||
# It should be added in your version-control system (i.e. Git)
|
||||
provider = "postgresql"
|
||||
19
prisma/schema.prisma
Normal file
19
prisma/schema.prisma
Normal file
@@ -0,0 +1,19 @@
|
||||
// This is your Prisma schema file,
|
||||
// learn more about it in the docs: https://pris.ly/d/prisma-schema
|
||||
|
||||
generator client {
|
||||
provider = "prisma-client-js"
|
||||
}
|
||||
|
||||
datasource db {
|
||||
provider = "postgresql"
|
||||
url = env("DATABASE_URL")
|
||||
}
|
||||
|
||||
model Record {
|
||||
id String @id @default(uuid())
|
||||
datetime DateTime @default(now())
|
||||
name String
|
||||
areas String?
|
||||
url String @unique
|
||||
}
|
||||
73
src/index.ts
Normal file
73
src/index.ts
Normal file
@@ -0,0 +1,73 @@
|
||||
import { PrismaClient } from '@prisma/client';
|
||||
import Xray from 'x-ray';
|
||||
import { z } from 'zod';
|
||||
import { fromZodError } from 'zod-validation-error';
|
||||
|
||||
const prisma = new PrismaClient();
|
||||
const x = Xray();
|
||||
|
||||
if (
|
||||
!process.env.WEBSITE_URL ||
|
||||
!process.env.ANCHOR_ELEMENT ||
|
||||
!process.env.ITERATIVE_CLASS ||
|
||||
!process.env.NAME_CLASS ||
|
||||
!process.env.AREAS_CLASS ||
|
||||
!process.env.URL_CLASS ||
|
||||
!process.env.PAGINATE_CLASS
|
||||
) {
|
||||
throw new Error('Not all environment variables are defined');
|
||||
}
|
||||
|
||||
const Record = z.object({
|
||||
name: z.string(),
|
||||
areas: z.string().optional(),
|
||||
url: z.string()
|
||||
});
|
||||
|
||||
const Records = z.array(Record);
|
||||
|
||||
x(process.env.WEBSITE_URL, process.env.ANCHOR_ELEMENT, {
|
||||
items: x(process.env.ITERATIVE_CLASS, [
|
||||
{
|
||||
name: process.env.NAME_CLASS,
|
||||
areas: process.env.AREAS_CLASS,
|
||||
url: process.env.URL_CLASS
|
||||
}
|
||||
]).paginate(process.env.PAGINATE_CLASS)
|
||||
})(async (err, data) => {
|
||||
if (err) {
|
||||
return console.log(err);
|
||||
}
|
||||
|
||||
const safeData = Records.safeParse(data.items);
|
||||
if (!safeData.success) {
|
||||
const validationError = fromZodError(err as unknown as z.ZodError);
|
||||
console.log(validationError);
|
||||
return;
|
||||
}
|
||||
|
||||
const allPromises = safeData.data.map(item => {
|
||||
return prisma.record.upsert({
|
||||
where: {
|
||||
url: item.url
|
||||
},
|
||||
create: {
|
||||
name: item.name.replace(/\t|\n/g, ''),
|
||||
areas: item.areas,
|
||||
url: item.url
|
||||
},
|
||||
update: {
|
||||
name: item.name.replace(/\t|\n/g, ''),
|
||||
areas: item.areas
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
await Promise.all(allPromises);
|
||||
|
||||
const names = safeData.data.map(item => {
|
||||
return item.name.replace(/\t|\n/g, '');
|
||||
});
|
||||
|
||||
console.log(names, `Found ${names.length} records.`);
|
||||
});
|
||||
10
tsconfig.json
Normal file
10
tsconfig.json
Normal file
@@ -0,0 +1,10 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2018",
|
||||
"module": "CommonJS",
|
||||
"esModuleInterop": true,
|
||||
"strict": true
|
||||
},
|
||||
"include": ["src/**/*.ts"],
|
||||
"exclude": ["node_modules"]
|
||||
}
|
||||
Reference in New Issue
Block a user