Get DuckDuckGo results in Node. https://npmjs.com/package/node-ddg
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

156 lines
4.4 KiB

  1. import cheerio from 'cheerio';
  2. import puppeteer from 'puppeteer';
  3. import queryString from 'query-string';
  4. export enum SafeSearchEnum {
  5. 'OFF' = -2,
  6. 'MODERATE' = -1,
  7. 'ON' = 1
  8. }
  9. export interface Result {
  10. // The body of the result, the website's description.
  11. body: string;
  12. // The title of the result, the website's title.
  13. title: string;
  14. // The link of the result, the link to the website.
  15. url: string;
  16. }
  17. export interface SearchOptions {
  18. maxResults?: number;
  19. query: string;
  20. safeSearch?: SafeSearchEnum;
  21. secure?: boolean;
  22. }
  23. export interface Protocols {
  24. http: string;
  25. https: string;
  26. }
  27. const defaultOptions: SearchOptions = {
  28. maxResults: 5,
  29. query: null!,
  30. safeSearch: SafeSearchEnum.MODERATE,
  31. secure: true
  32. };
  33. const protocols: Protocols = {
  34. http: 'http://',
  35. https: 'https://'
  36. };
  37. const endpoint = 'www.duckduckgo.com/?';
  38. /**
  39. * @function search
  40. * @description Returns the title, body and link of searched queries from DuckDuckGo.
  41. * @param args Search options
  42. * @param args.maxResults Max results to return, defaults to `5`
  43. * @param args.query Search query, required
  44. * @param args.safeSearch Safe Search setting as defined in https://duckduckgo.com/params, defaults to `MODERATE`
  45. * @param args.secure Whether to use HTTPS or HTTP, defaults to `true`
  46. * @license MIT
  47. */
  48. export async function search(args: SearchOptions): Promise<Result[]> {
  49. // Make sure there is a query to search for
  50. if (args.query === null || args.query.length === 0) {
  51. return Promise.reject(new Error('The search query cannot be empty.'));
  52. }
  53. // Init the options with the default options and then the passed through options
  54. const options: SearchOptions = {...defaultOptions, ...args};
  55. // For parameter explanations, see https://duckduckgo.com/params
  56. const parameters = queryString.stringify({
  57. kd: -1,
  58. kp: options.safeSearch,
  59. kz: -1,
  60. q: options.query
  61. });
  62. // Create a new Browser and a Page
  63. const browser: puppeteer.Browser = await puppeteer.launch({
  64. args: ['--no-sandbox'],
  65. timeout: 60000
  66. });
  67. const page: puppeteer.Page = await browser.newPage();
  68. // Create the url and go to it
  69. const searchUrl = `${
  70. options.secure ? protocols.https : protocols.http
  71. }${endpoint}${parameters}`;
  72. await page.goto(searchUrl);
  73. // Wait for web content wrapper to load
  74. await page.waitForSelector('#web_content_wrapper');
  75. await page.waitFor(250);
  76. // Get the results HTML, we tell Istanbul/Jest's Codecov to ignore the function inside page.evaluate
  77. // because Istanbul can't access it as Puppeteer is going to execute it outside Node
  78. const html: string[] = await page.evaluate(
  79. /* istanbul ignore next */ () => {
  80. // eslint-disable-next-line no-undef
  81. return [...document.querySelectorAll('#links')].map(
  82. (value: Element) => value.innerHTML
  83. );
  84. }
  85. );
  86. // If there's no results found reject with the appropriate error
  87. if (html[0].indexOf('No results found for') > 0) {
  88. if (args.safeSearch === SafeSearchEnum.ON) {
  89. await browser.close();
  90. return Promise.reject(
  91. new Error('No results were found because SafeSearch is enabled.')
  92. );
  93. }
  94. await browser.close();
  95. return Promise.reject(
  96. new Error(`No results were found with query: ${options.query}`)
  97. );
  98. }
  99. // Init the results array
  100. const results: Result[] = [];
  101. // Iterate through results found in the HTML
  102. for (const result of html) {
  103. // Load the HTML into Cheerio (jQuery but for Node)
  104. const $ = cheerio.load(result, {normalizeWhitespace: true});
  105. // Iterate through each element found inside specified selector and assign the title/body/url, pushing them
  106. // into the results array
  107. $('.result>.result__body').each(
  108. (index: number, element: CheerioElement) => {
  109. if (index >= options.maxResults!) {
  110. return;
  111. }
  112. const title: string = cheerio(element)
  113. .find('.result__a')
  114. .html()!
  115. .replace(/(<b>|<\/b>)/g, '**')
  116. .trim();
  117. const body: string = cheerio(element)
  118. .find('.result__snippet')
  119. .html()!
  120. .replace(/(<b>|<\/b>)/g, '**')
  121. .trim();
  122. const url: string = cheerio(element)
  123. .find('.result__url')
  124. .attr('href')!
  125. .trim();
  126. results.push({body, title, url});
  127. }
  128. );
  129. }
  130. // Before we resolve the results, make sure to close the browser
  131. await browser.close();
  132. return results;
  133. }
  134. exports.default = search;